github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/storage/mvcc_incremental_iterator_test.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package storage
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	"fmt"
    17  	"math"
    18  	"path/filepath"
    19  	"testing"
    20  
    21  	"github.com/cockroachdb/cockroach/pkg/keys"
    22  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    23  	"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
    24  	"github.com/cockroachdb/cockroach/pkg/testutils"
    25  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    26  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    27  	"github.com/cockroachdb/cockroach/pkg/util/uuid"
    28  	"github.com/cockroachdb/errors"
    29  	"github.com/stretchr/testify/assert"
    30  	"github.com/stretchr/testify/require"
    31  	"golang.org/x/sync/errgroup"
    32  )
    33  
    34  const all, latest = true, false
    35  
    36  func iterateExpectErr(
    37  	e Engine,
    38  	startKey, endKey roachpb.Key,
    39  	startTime, endTime hlc.Timestamp,
    40  	revisions bool,
    41  	errString string,
    42  ) func(*testing.T) {
    43  	return func(t *testing.T) {
    44  		t.Helper()
    45  		iter := NewMVCCIncrementalIterator(e, MVCCIncrementalIterOptions{
    46  			IterOptions: IterOptions{
    47  				UpperBound: endKey,
    48  			},
    49  			StartTime: startTime,
    50  			EndTime:   endTime,
    51  		})
    52  		defer iter.Close()
    53  		var iterFn func()
    54  		if revisions {
    55  			iterFn = iter.Next
    56  		} else {
    57  			iterFn = iter.NextKey
    58  		}
    59  		for iter.SeekGE(MakeMVCCMetadataKey(startKey)); ; iterFn() {
    60  			if ok, _ := iter.Valid(); !ok || iter.UnsafeKey().Key.Compare(endKey) >= 0 {
    61  				break
    62  			}
    63  			// pass
    64  		}
    65  		if _, err := iter.Valid(); !testutils.IsError(err, errString) {
    66  			t.Fatalf("expected error %q but got %v", errString, err)
    67  		}
    68  	}
    69  }
    70  
    71  func assertExportedKVs(
    72  	t *testing.T,
    73  	e Engine,
    74  	startKey, endKey roachpb.Key,
    75  	startTime, endTime hlc.Timestamp,
    76  	revisions bool,
    77  	io IterOptions,
    78  	expected []MVCCKeyValue,
    79  ) {
    80  	const big = 1 << 30
    81  	data, _, _, err := e.ExportToSst(startKey, endKey, startTime, endTime, revisions, big, big, io)
    82  	require.NoError(t, err)
    83  
    84  	if data == nil {
    85  		require.Nil(t, expected)
    86  		return
    87  	}
    88  	sst, err := NewMemSSTIterator(data, false)
    89  	require.NoError(t, err)
    90  	defer sst.Close()
    91  
    92  	sst.SeekGE(MVCCKey{})
    93  	for i := range expected {
    94  		ok, err := sst.Valid()
    95  		require.NoError(t, err)
    96  		require.Truef(t, ok, "iteration produced %d keys, expected %d", i, len(expected))
    97  		assert.Equalf(t, expected[i].Key, sst.UnsafeKey(), "key %d", i)
    98  		if expected[i].Value == nil {
    99  			assert.Equalf(t, []byte{}, sst.UnsafeValue(), "key %d %q", i, sst.UnsafeKey())
   100  		} else {
   101  			assert.Equalf(t, expected[i].Value, sst.UnsafeValue(), "key %d %q", i, sst.UnsafeKey())
   102  		}
   103  		sst.Next()
   104  	}
   105  	ok, err := sst.Valid()
   106  	require.NoError(t, err)
   107  	require.False(t, ok)
   108  }
   109  
   110  func assertIteratedKVs(
   111  	t *testing.T,
   112  	e Engine,
   113  	startKey, endKey roachpb.Key,
   114  	startTime, endTime hlc.Timestamp,
   115  	revisions bool,
   116  	io IterOptions,
   117  	expected []MVCCKeyValue,
   118  ) {
   119  	iter := NewMVCCIncrementalIterator(e, MVCCIncrementalIterOptions{
   120  		IterOptions: io,
   121  		StartTime:   startTime,
   122  		EndTime:     endTime,
   123  	})
   124  	defer iter.Close()
   125  	var iterFn func()
   126  	if revisions {
   127  		iterFn = iter.Next
   128  	} else {
   129  		iterFn = iter.NextKey
   130  	}
   131  	var kvs []MVCCKeyValue
   132  	for iter.SeekGE(MakeMVCCMetadataKey(startKey)); ; iterFn() {
   133  		if ok, err := iter.Valid(); err != nil {
   134  			t.Fatalf("unexpected error: %+v", err)
   135  		} else if !ok || iter.UnsafeKey().Key.Compare(endKey) >= 0 {
   136  			break
   137  		}
   138  		kvs = append(kvs, MVCCKeyValue{Key: iter.Key(), Value: iter.Value()})
   139  	}
   140  
   141  	if len(kvs) != len(expected) {
   142  		t.Fatalf("got %d kvs but expected %d: %v", len(kvs), len(expected), kvs)
   143  	}
   144  	for i := range kvs {
   145  		if !kvs[i].Key.Equal(expected[i].Key) {
   146  			t.Fatalf("%d key: got %v but expected %v", i, kvs[i].Key, expected[i].Key)
   147  		}
   148  		if !bytes.Equal(kvs[i].Value, expected[i].Value) {
   149  			t.Fatalf("%d value: got %x but expected %x", i, kvs[i].Value, expected[i].Value)
   150  		}
   151  	}
   152  }
   153  
   154  func assertEqualKVs(
   155  	e Engine,
   156  	startKey, endKey roachpb.Key,
   157  	startTime, endTime hlc.Timestamp,
   158  	revisions bool,
   159  	expected []MVCCKeyValue,
   160  ) func(*testing.T) {
   161  	return func(t *testing.T) {
   162  		t.Helper()
   163  		io := IterOptions{UpperBound: endKey}
   164  		t.Run("iterate", func(t *testing.T) {
   165  			assertIteratedKVs(t, e, startKey, endKey, startTime, endTime, revisions, io, expected)
   166  		})
   167  		t.Run("iterate-tbi", func(t *testing.T) {
   168  			io := io
   169  			io.MinTimestampHint = startTime.Next()
   170  			io.MaxTimestampHint = endTime
   171  			assertIteratedKVs(t, e, startKey, endKey, startTime, endTime, revisions, io, expected)
   172  		})
   173  
   174  		t.Run("export", func(t *testing.T) {
   175  			assertExportedKVs(t, e, startKey, endKey, startTime, endTime, revisions, io, expected)
   176  		})
   177  		t.Run("export-tbi", func(t *testing.T) {
   178  			io := io
   179  			io.MinTimestampHint = startTime.Next()
   180  			io.MaxTimestampHint = endTime
   181  			assertExportedKVs(t, e, startKey, endKey, startTime, endTime, revisions, io, expected)
   182  		})
   183  	}
   184  }
   185  
   186  func TestMVCCIncrementalIterator(t *testing.T) {
   187  	defer leaktest.AfterTest(t)()
   188  	ctx := context.Background()
   189  
   190  	var (
   191  		keyMin   = roachpb.KeyMin
   192  		keyMax   = roachpb.KeyMax
   193  		testKey1 = roachpb.Key("/db1")
   194  		testKey2 = roachpb.Key("/db2")
   195  
   196  		testValue1 = []byte("val1")
   197  		testValue2 = []byte("val2")
   198  		testValue3 = []byte("val3")
   199  		testValue4 = []byte("val4")
   200  
   201  		// Use a non-zero min, since we use IsEmpty to decide if a ts should be used
   202  		// as upper/lower-bound during iterator initialization.
   203  		tsMin = hlc.Timestamp{WallTime: 0, Logical: 1}
   204  		ts1   = hlc.Timestamp{WallTime: 1, Logical: 0}
   205  		ts2   = hlc.Timestamp{WallTime: 2, Logical: 0}
   206  		ts3   = hlc.Timestamp{WallTime: 3, Logical: 0}
   207  		ts4   = hlc.Timestamp{WallTime: 4, Logical: 0}
   208  		tsMax = hlc.Timestamp{WallTime: math.MaxInt64, Logical: 0}
   209  	)
   210  
   211  	makeKVT := func(key roachpb.Key, value []byte, ts hlc.Timestamp) MVCCKeyValue {
   212  		return MVCCKeyValue{Key: MVCCKey{Key: key, Timestamp: ts}, Value: value}
   213  	}
   214  
   215  	kv1_1_1 := makeKVT(testKey1, testValue1, ts1)
   216  	kv1_4_4 := makeKVT(testKey1, testValue4, ts4)
   217  	kv1_2_2 := makeKVT(testKey1, testValue2, ts2)
   218  	kv2_2_2 := makeKVT(testKey2, testValue3, ts2)
   219  	kv1_3Deleted := makeKVT(testKey1, nil, ts3)
   220  	kvs := func(kvs ...MVCCKeyValue) []MVCCKeyValue { return kvs }
   221  
   222  	for _, engineImpl := range mvccEngineImpls {
   223  		t.Run(engineImpl.name, func(t *testing.T) {
   224  			e := engineImpl.create()
   225  			defer e.Close()
   226  
   227  			t.Run("empty", assertEqualKVs(e, keyMin, keyMax, tsMin, ts3, latest, nil))
   228  
   229  			for _, kv := range kvs(kv1_1_1, kv1_2_2, kv2_2_2) {
   230  				v := roachpb.Value{RawBytes: kv.Value}
   231  				if err := MVCCPut(ctx, e, nil, kv.Key.Key, kv.Key.Timestamp, v, nil); err != nil {
   232  					t.Fatal(err)
   233  				}
   234  			}
   235  
   236  			// Exercise time ranges.
   237  			t.Run("ts (0-0]", assertEqualKVs(e, keyMin, keyMax, tsMin, tsMin, latest, nil))
   238  			t.Run("ts (0-1]", assertEqualKVs(e, keyMin, keyMax, tsMin, ts1, latest, kvs(kv1_1_1)))
   239  			t.Run("ts (0-∞]", assertEqualKVs(e, keyMin, keyMax, tsMin, tsMax, latest, kvs(kv1_2_2, kv2_2_2)))
   240  			t.Run("ts (1-1]", assertEqualKVs(e, keyMin, keyMax, ts1, ts1, latest, nil))
   241  			t.Run("ts (1-2]", assertEqualKVs(e, keyMin, keyMax, ts1, ts2, latest, kvs(kv1_2_2, kv2_2_2)))
   242  			t.Run("ts (2-2]", assertEqualKVs(e, keyMin, keyMax, ts2, ts2, latest, nil))
   243  
   244  			// Exercise key ranges.
   245  			t.Run("kv [1-1)", assertEqualKVs(e, testKey1, testKey1, tsMin, tsMax, latest, nil))
   246  			t.Run("kv [1-2)", assertEqualKVs(e, testKey1, testKey2, tsMin, tsMax, latest, kvs(kv1_2_2)))
   247  
   248  			// Exercise deletion.
   249  			if err := MVCCDelete(ctx, e, nil, testKey1, ts3, nil); err != nil {
   250  				t.Fatal(err)
   251  			}
   252  			t.Run("del", assertEqualKVs(e, keyMin, keyMax, ts1, tsMax, latest, kvs(kv1_3Deleted, kv2_2_2)))
   253  
   254  			// Exercise intent handling.
   255  			txn1ID := uuid.MakeV4()
   256  			txn1 := roachpb.Transaction{
   257  				TxnMeta: enginepb.TxnMeta{
   258  					Key:            testKey1,
   259  					ID:             txn1ID,
   260  					Epoch:          1,
   261  					WriteTimestamp: ts4,
   262  				},
   263  				ReadTimestamp: ts4,
   264  			}
   265  			txn1Val := roachpb.Value{RawBytes: testValue4}
   266  			if err := MVCCPut(ctx, e, nil, txn1.TxnMeta.Key, txn1.ReadTimestamp, txn1Val, &txn1); err != nil {
   267  				t.Fatal(err)
   268  			}
   269  			txn2ID := uuid.MakeV4()
   270  			txn2 := roachpb.Transaction{
   271  				TxnMeta: enginepb.TxnMeta{
   272  					Key:            testKey2,
   273  					ID:             txn2ID,
   274  					Epoch:          1,
   275  					WriteTimestamp: ts4,
   276  				},
   277  				ReadTimestamp: ts4,
   278  			}
   279  			txn2Val := roachpb.Value{RawBytes: testValue4}
   280  			if err := MVCCPut(ctx, e, nil, txn2.TxnMeta.Key, txn2.ReadTimestamp, txn2Val, &txn2); err != nil {
   281  				t.Fatal(err)
   282  			}
   283  			t.Run("intents",
   284  				iterateExpectErr(e, testKey1, testKey1.PrefixEnd(), tsMin, tsMax, latest, "conflicting intents"))
   285  			t.Run("intents",
   286  				iterateExpectErr(e, testKey2, testKey2.PrefixEnd(), tsMin, tsMax, latest, "conflicting intents"))
   287  			t.Run("intents",
   288  				iterateExpectErr(e, keyMin, keyMax, tsMin, ts4, latest, "conflicting intents"))
   289  			// Intents above the upper time bound or beneath the lower time bound must
   290  			// be ignored (#28358). Note that the lower time bound is exclusive while
   291  			// the upper time bound is inclusive.
   292  			t.Run("intents", assertEqualKVs(e, keyMin, keyMax, tsMin, ts3, latest, kvs(kv1_3Deleted, kv2_2_2)))
   293  			t.Run("intents", assertEqualKVs(e, keyMin, keyMax, ts4, tsMax, latest, kvs()))
   294  			t.Run("intents", assertEqualKVs(e, keyMin, keyMax, ts4.Next(), tsMax, latest, kvs()))
   295  
   296  			intent1 := roachpb.MakeLockUpdate(&txn1, roachpb.Span{Key: testKey1})
   297  			intent1.Status = roachpb.COMMITTED
   298  			if _, err := MVCCResolveWriteIntent(ctx, e, nil, intent1); err != nil {
   299  				t.Fatal(err)
   300  			}
   301  			intent2 := roachpb.MakeLockUpdate(&txn2, roachpb.Span{Key: testKey2})
   302  			intent2.Status = roachpb.ABORTED
   303  			if _, err := MVCCResolveWriteIntent(ctx, e, nil, intent2); err != nil {
   304  				t.Fatal(err)
   305  			}
   306  			t.Run("intents", assertEqualKVs(e, keyMin, keyMax, tsMin, tsMax, latest, kvs(kv1_4_4, kv2_2_2)))
   307  		})
   308  	}
   309  
   310  	for _, engineImpl := range mvccEngineImpls {
   311  		t.Run(engineImpl.name, func(t *testing.T) {
   312  			e := engineImpl.create()
   313  			defer e.Close()
   314  
   315  			t.Run("empty", assertEqualKVs(e, keyMin, keyMax, tsMin, ts3, all, nil))
   316  
   317  			for _, kv := range kvs(kv1_1_1, kv1_2_2, kv2_2_2) {
   318  				v := roachpb.Value{RawBytes: kv.Value}
   319  				if err := MVCCPut(ctx, e, nil, kv.Key.Key, kv.Key.Timestamp, v, nil); err != nil {
   320  					t.Fatal(err)
   321  				}
   322  			}
   323  
   324  			// Exercise time ranges.
   325  			t.Run("ts (0-0]", assertEqualKVs(e, keyMin, keyMax, tsMin, tsMin, all, nil))
   326  			t.Run("ts (0-1]", assertEqualKVs(e, keyMin, keyMax, tsMin, ts1, all, kvs(kv1_1_1)))
   327  			t.Run("ts (0-∞]", assertEqualKVs(e, keyMin, keyMax, tsMin, tsMax, all, kvs(kv1_2_2, kv1_1_1, kv2_2_2)))
   328  			t.Run("ts (1-1]", assertEqualKVs(e, keyMin, keyMax, ts1, ts1, all, nil))
   329  			t.Run("ts (1-2]", assertEqualKVs(e, keyMin, keyMax, ts1, ts2, all, kvs(kv1_2_2, kv2_2_2)))
   330  			t.Run("ts (2-2]", assertEqualKVs(e, keyMin, keyMax, ts2, ts2, all, nil))
   331  
   332  			// Exercise key ranges.
   333  			t.Run("kv [1-1)", assertEqualKVs(e, testKey1, testKey1, tsMin, tsMax, all, nil))
   334  			t.Run("kv [1-2)", assertEqualKVs(e, testKey1, testKey2, tsMin, tsMax, all, kvs(kv1_2_2, kv1_1_1)))
   335  
   336  			// Exercise deletion.
   337  			if err := MVCCDelete(ctx, e, nil, testKey1, ts3, nil); err != nil {
   338  				t.Fatal(err)
   339  			}
   340  			t.Run("del", assertEqualKVs(e, keyMin, keyMax, ts1, tsMax, all, kvs(kv1_3Deleted, kv1_2_2, kv2_2_2)))
   341  
   342  			// Exercise intent handling.
   343  			txn1ID := uuid.MakeV4()
   344  			txn1 := roachpb.Transaction{
   345  				TxnMeta: enginepb.TxnMeta{
   346  					Key:            testKey1,
   347  					ID:             txn1ID,
   348  					Epoch:          1,
   349  					WriteTimestamp: ts4,
   350  				},
   351  				ReadTimestamp: ts4,
   352  			}
   353  			txn1Val := roachpb.Value{RawBytes: testValue4}
   354  			if err := MVCCPut(ctx, e, nil, txn1.TxnMeta.Key, txn1.ReadTimestamp, txn1Val, &txn1); err != nil {
   355  				t.Fatal(err)
   356  			}
   357  			txn2ID := uuid.MakeV4()
   358  			txn2 := roachpb.Transaction{
   359  				TxnMeta: enginepb.TxnMeta{
   360  					Key:            testKey2,
   361  					ID:             txn2ID,
   362  					Epoch:          1,
   363  					WriteTimestamp: ts4,
   364  				},
   365  				ReadTimestamp: ts4,
   366  			}
   367  			txn2Val := roachpb.Value{RawBytes: testValue4}
   368  			if err := MVCCPut(ctx, e, nil, txn2.TxnMeta.Key, txn2.ReadTimestamp, txn2Val, &txn2); err != nil {
   369  				t.Fatal(err)
   370  			}
   371  			t.Run("intents",
   372  				iterateExpectErr(e, testKey1, testKey1.PrefixEnd(), tsMin, tsMax, all, "conflicting intents"))
   373  			t.Run("intents",
   374  				iterateExpectErr(e, testKey2, testKey2.PrefixEnd(), tsMin, tsMax, all, "conflicting intents"))
   375  			t.Run("intents",
   376  				iterateExpectErr(e, keyMin, keyMax, tsMin, ts4, all, "conflicting intents"))
   377  			// Intents above the upper time bound or beneath the lower time bound must
   378  			// be ignored (#28358). Note that the lower time bound is exclusive while
   379  			// the upper time bound is inclusive.
   380  			t.Run("intents", assertEqualKVs(e, keyMin, keyMax, tsMin, ts3, all, kvs(kv1_3Deleted, kv1_2_2, kv1_1_1, kv2_2_2)))
   381  			t.Run("intents", assertEqualKVs(e, keyMin, keyMax, ts4, tsMax, all, kvs()))
   382  			t.Run("intents", assertEqualKVs(e, keyMin, keyMax, ts4.Next(), tsMax, all, kvs()))
   383  
   384  			intent1 := roachpb.MakeLockUpdate(&txn1, roachpb.Span{Key: testKey1})
   385  			intent1.Status = roachpb.COMMITTED
   386  			if _, err := MVCCResolveWriteIntent(ctx, e, nil, intent1); err != nil {
   387  				t.Fatal(err)
   388  			}
   389  			intent2 := roachpb.MakeLockUpdate(&txn2, roachpb.Span{Key: testKey2})
   390  			intent2.Status = roachpb.ABORTED
   391  			if _, err := MVCCResolveWriteIntent(ctx, e, nil, intent2); err != nil {
   392  				t.Fatal(err)
   393  			}
   394  			t.Run("intents", assertEqualKVs(e, keyMin, keyMax, tsMin, tsMax, all, kvs(kv1_4_4, kv1_3Deleted, kv1_2_2, kv1_1_1, kv2_2_2)))
   395  		})
   396  	}
   397  }
   398  
   399  func slurpKVsInTimeRange(
   400  	reader Reader, prefix roachpb.Key, startTime, endTime hlc.Timestamp,
   401  ) ([]MVCCKeyValue, error) {
   402  	endKey := prefix.PrefixEnd()
   403  	iter := NewMVCCIncrementalIterator(reader, MVCCIncrementalIterOptions{
   404  		IterOptions: IterOptions{
   405  			UpperBound: endKey,
   406  		},
   407  		StartTime: startTime,
   408  		EndTime:   endTime,
   409  	})
   410  	defer iter.Close()
   411  	var kvs []MVCCKeyValue
   412  	for iter.SeekGE(MakeMVCCMetadataKey(prefix)); ; iter.Next() {
   413  		if ok, err := iter.Valid(); err != nil {
   414  			return nil, err
   415  		} else if !ok || iter.UnsafeKey().Key.Compare(endKey) >= 0 {
   416  			break
   417  		}
   418  		kvs = append(kvs, MVCCKeyValue{Key: iter.Key(), Value: iter.Value()})
   419  	}
   420  	return kvs, nil
   421  }
   422  
   423  // TestMVCCIncrementalIteratorIntentRewrittenConcurrently verifies that the
   424  // workaround in MVCCIncrementalIterator to double-check for deleted intents
   425  // properly handles cases where an intent originally in a time-bound iterator's
   426  // time range is rewritten at a timestamp outside of its time range.
   427  func TestMVCCIncrementalIteratorIntentRewrittenConcurrently(t *testing.T) {
   428  	defer leaktest.AfterTest(t)()
   429  
   430  	for _, engineImpl := range mvccEngineImpls {
   431  		t.Run(engineImpl.name, func(t *testing.T) {
   432  			e := engineImpl.create()
   433  			defer e.Close()
   434  
   435  			// Create a DB containing a single intent.
   436  			ctx := context.Background()
   437  
   438  			kA := roachpb.Key("kA")
   439  			vA1 := roachpb.MakeValueFromString("vA1")
   440  			vA2 := roachpb.MakeValueFromString("vA2")
   441  			ts0 := hlc.Timestamp{WallTime: 0}
   442  			ts1 := hlc.Timestamp{WallTime: 1}
   443  			ts2 := hlc.Timestamp{WallTime: 2}
   444  			ts3 := hlc.Timestamp{WallTime: 3}
   445  			txn := &roachpb.Transaction{
   446  				TxnMeta: enginepb.TxnMeta{
   447  					Key:            roachpb.Key("b"),
   448  					ID:             uuid.MakeV4(),
   449  					Epoch:          1,
   450  					WriteTimestamp: ts1,
   451  					Sequence:       1,
   452  				},
   453  				ReadTimestamp: ts1,
   454  			}
   455  			if err := MVCCPut(ctx, e, nil, kA, ts1, vA1, txn); err != nil {
   456  				t.Fatal(err)
   457  			}
   458  
   459  			// Concurrently iterate over the intent using a time-bound iterator and move
   460  			// the intent out of the time-bound iterator's time range by writing to it
   461  			// again at a higher timestamp.
   462  			g, _ := errgroup.WithContext(ctx)
   463  			g.Go(func() error {
   464  				// Re-write the intent with a higher timestamp.
   465  				txn.WriteTimestamp = ts3
   466  				txn.Sequence = 2
   467  				return MVCCPut(ctx, e, nil, kA, ts1, vA2, txn)
   468  			})
   469  			g.Go(func() error {
   470  				// Iterate with a time range that includes the initial intent but does
   471  				// not include the new intent.
   472  				kvs, err := slurpKVsInTimeRange(e, kA, ts0, ts2)
   473  
   474  				// There are two permissible outcomes from the scan. If the iteration
   475  				// wins the race with the put that moves the intent then it should
   476  				// observe the intent and return a write intent error. If the iteration
   477  				// loses the race with the put that moves the intent then it should
   478  				// observe and return nothing because there will be no committed or
   479  				// provisional keys in its time range.
   480  				if err != nil {
   481  					if !testutils.IsError(err, `conflicting intents on "kA"`) {
   482  						return err
   483  					}
   484  				} else {
   485  					if len(kvs) != 0 {
   486  						return errors.Errorf(`unexpected kvs: %v`, kvs)
   487  					}
   488  				}
   489  				return nil
   490  			})
   491  			if err := g.Wait(); err != nil {
   492  				t.Fatal(err)
   493  			}
   494  		})
   495  	}
   496  }
   497  
   498  // TestMVCCIncrementalIteratorIntentDeletion checks a workaround in
   499  // MVCCIncrementalIterator for a bug in time-bound iterators, where an intent
   500  // has been deleted, but the time-bound iterator doesn't see the deletion.
   501  func TestMVCCIncrementalIteratorIntentDeletion(t *testing.T) {
   502  	defer leaktest.AfterTest(t)()
   503  
   504  	txn := func(key roachpb.Key, ts hlc.Timestamp) *roachpb.Transaction {
   505  		return &roachpb.Transaction{
   506  			TxnMeta: enginepb.TxnMeta{
   507  				Key:            key,
   508  				ID:             uuid.MakeV4(),
   509  				Epoch:          1,
   510  				WriteTimestamp: ts,
   511  			},
   512  			ReadTimestamp: ts,
   513  		}
   514  	}
   515  	intent := func(txn *roachpb.Transaction) roachpb.LockUpdate {
   516  		intent := roachpb.MakeLockUpdate(txn, roachpb.Span{Key: txn.Key})
   517  		intent.Status = roachpb.COMMITTED
   518  		return intent
   519  	}
   520  
   521  	ctx := context.Background()
   522  	kA := roachpb.Key("kA")
   523  	vA1 := roachpb.MakeValueFromString("vA1")
   524  	vA2 := roachpb.MakeValueFromString("vA2")
   525  	vA3 := roachpb.MakeValueFromString("vA3")
   526  	kB := roachpb.Key("kB")
   527  	vB1 := roachpb.MakeValueFromString("vB1")
   528  	kC := roachpb.Key("kC")
   529  	vC1 := roachpb.MakeValueFromString("vC1")
   530  	ts0 := hlc.Timestamp{WallTime: 0}
   531  	ts1 := hlc.Timestamp{WallTime: 1}
   532  	ts2 := hlc.Timestamp{WallTime: 2}
   533  	ts3 := hlc.Timestamp{WallTime: 3}
   534  	txnA1 := txn(kA, ts1)
   535  	txnA3 := txn(kA, ts3)
   536  	txnB1 := txn(kB, ts1)
   537  	txnC1 := txn(kC, ts1)
   538  
   539  	db := NewInMem(ctx, DefaultStorageEngine, roachpb.Attributes{}, 10<<20)
   540  	defer db.Close()
   541  
   542  	// Set up two sstables very specifically:
   543  	//
   544  	// sst1 (time-bound metadata ts1->ts1)
   545  	// kA -> (intent)
   546  	// kA:1 -> vA1
   547  	// kB -> (intent)
   548  	// kB:1 -> vB1
   549  	// kC -> (intent)
   550  	// kC:1 -> vC1
   551  	//
   552  	// sst2 (time-bound metadata ts2->ts3) the intent deletions are for the
   553  	// intents at ts1, but there's no way know that when constructing the
   554  	// metadata (hence the time-bound iterator bug)
   555  	// kA -> (intent) [NB this overwrites the intent deletion]
   556  	// kA:3 -> vA3
   557  	// kA:2 -> vA2
   558  	// kB -> (intent deletion)
   559  	require.NoError(t, MVCCPut(ctx, db, nil, kA, txnA1.ReadTimestamp, vA1, txnA1))
   560  	require.NoError(t, MVCCPut(ctx, db, nil, kB, txnB1.ReadTimestamp, vB1, txnB1))
   561  	require.NoError(t, MVCCPut(ctx, db, nil, kC, txnC1.ReadTimestamp, vC1, txnC1))
   562  	require.NoError(t, db.Flush())
   563  	require.NoError(t, db.Compact())
   564  	_, err := MVCCResolveWriteIntent(ctx, db, nil, intent(txnA1))
   565  	require.NoError(t, err)
   566  	_, err = MVCCResolveWriteIntent(ctx, db, nil, intent(txnB1))
   567  	require.NoError(t, err)
   568  	require.NoError(t, MVCCPut(ctx, db, nil, kA, ts2, vA2, nil))
   569  	require.NoError(t, MVCCPut(ctx, db, nil, kA, txnA3.WriteTimestamp, vA3, txnA3))
   570  	require.NoError(t, db.Flush())
   571  
   572  	if rocks, ok := db.(*RocksDB); ok {
   573  		// Double-check that we've created the SSTs we intended to.
   574  		userProps, err := rocks.GetUserProperties()
   575  		require.NoError(t, err)
   576  		require.Len(t, userProps.Sst, 2)
   577  		require.Equal(t, userProps.Sst[0].TsMin, &ts1)
   578  		require.Equal(t, userProps.Sst[0].TsMax, &ts1)
   579  		require.Equal(t, userProps.Sst[1].TsMin, &ts2)
   580  		require.Equal(t, userProps.Sst[1].TsMax, &ts3)
   581  	}
   582  
   583  	// The kA ts1 intent has been resolved. There's now a new intent on kA, but
   584  	// the timestamp (ts3) is too new so it should be ignored.
   585  	kvs, err := slurpKVsInTimeRange(db, kA, ts0, ts1)
   586  	require.NoError(t, err)
   587  	require.Equal(t, []MVCCKeyValue{
   588  		{Key: MVCCKey{Key: kA, Timestamp: ts1}, Value: vA1.RawBytes},
   589  	}, kvs)
   590  	// kA has a value at ts2. Again the intent is too new (ts3), so ignore.
   591  	kvs, err = slurpKVsInTimeRange(db, kA, ts0, ts2)
   592  	require.NoError(t, err)
   593  	require.Equal(t, []MVCCKeyValue{
   594  		{Key: MVCCKey{Key: kA, Timestamp: ts2}, Value: vA2.RawBytes},
   595  		{Key: MVCCKey{Key: kA, Timestamp: ts1}, Value: vA1.RawBytes},
   596  	}, kvs)
   597  	// At ts3, we should see the new intent
   598  	_, err = slurpKVsInTimeRange(db, kA, ts0, ts3)
   599  	require.EqualError(t, err, `conflicting intents on "kA"`)
   600  
   601  	// Similar to the kA ts1 check, but there is no newer intent. We expect to
   602  	// pick up the intent deletion and it should cancel out the intent, leaving
   603  	// only the value at ts1.
   604  	kvs, err = slurpKVsInTimeRange(db, kB, ts0, ts1)
   605  	require.NoError(t, err)
   606  	require.Equal(t, []MVCCKeyValue{
   607  		{Key: MVCCKey{Key: kB, Timestamp: ts1}, Value: vB1.RawBytes},
   608  	}, kvs)
   609  
   610  	// Sanity check that we see the still unresolved intent for kC ts1.
   611  	_, err = slurpKVsInTimeRange(db, kC, ts0, ts1)
   612  	require.EqualError(t, err, `conflicting intents on "kC"`)
   613  }
   614  
   615  func TestMVCCIncrementalIteratorIntentStraddlesSStables(t *testing.T) {
   616  	defer leaktest.AfterTest(t)()
   617  
   618  	// Create a DB containing 2 keys, a and b, where b has an intent. We use the
   619  	// regular MVCCPut operation to generate these keys, which we'll later be
   620  	// copying into manually created sstables.
   621  	ctx := context.Background()
   622  	db1 := NewInMem(ctx, DefaultStorageEngine, roachpb.Attributes{}, 10<<20 /* 10 MB */)
   623  	defer db1.Close()
   624  
   625  	put := func(key, value string, ts int64, txn *roachpb.Transaction) {
   626  		v := roachpb.MakeValueFromString(value)
   627  		if err := MVCCPut(
   628  			ctx, db1, nil, roachpb.Key(key), hlc.Timestamp{WallTime: ts}, v, txn,
   629  		); err != nil {
   630  			t.Fatal(err)
   631  		}
   632  	}
   633  
   634  	put("a", "a value", 1, nil)
   635  	put("b", "b value", 2, &roachpb.Transaction{
   636  		TxnMeta: enginepb.TxnMeta{
   637  			Key:            roachpb.Key("b"),
   638  			ID:             uuid.MakeV4(),
   639  			Epoch:          1,
   640  			WriteTimestamp: hlc.Timestamp{WallTime: 2},
   641  		},
   642  		ReadTimestamp: hlc.Timestamp{WallTime: 2},
   643  	})
   644  
   645  	// Create a second DB in which we'll create a specific SSTable structure: the
   646  	// first SSTable contains 2 KVs where the first is a regular versioned key
   647  	// and the second is the MVCC metadata entry (i.e. an intent). The next
   648  	// SSTable contains the provisional value for the intent. The effect is that
   649  	// the metadata entry is separated from the entry it is metadata for.
   650  	//
   651  	//   SSTable 1:
   652  	//     a@1
   653  	//     b@<meta>
   654  	//
   655  	//   SSTable 2:
   656  	//     b@2
   657  	db2 := NewInMem(ctx, DefaultStorageEngine, roachpb.Attributes{}, 10<<20 /* 10 MB */)
   658  	defer db2.Close()
   659  
   660  	ingest := func(it Iterator, count int) {
   661  		sst, err := MakeRocksDBSstFileWriter()
   662  		if err != nil {
   663  			t.Fatal(err)
   664  		}
   665  		defer sst.Close()
   666  
   667  		for i := 0; i < count; i++ {
   668  			ok, err := it.Valid()
   669  			if err != nil {
   670  				t.Fatal(err)
   671  			}
   672  			if !ok {
   673  				t.Fatal("expected key")
   674  			}
   675  			if err := sst.Put(it.Key(), it.Value()); err != nil {
   676  				t.Fatal(err)
   677  			}
   678  			it.Next()
   679  		}
   680  		sstContents, err := sst.Finish()
   681  		if err != nil {
   682  			t.Fatal(err)
   683  		}
   684  		if err := db2.WriteFile(`ingest`, sstContents); err != nil {
   685  			t.Fatal(err)
   686  		}
   687  		if err := db2.IngestExternalFiles(ctx, []string{`ingest`}); err != nil {
   688  			t.Fatal(err)
   689  		}
   690  	}
   691  
   692  	{
   693  		// Iterate over the entries in the first DB, ingesting them into SSTables
   694  		// in the second DB.
   695  		it := db1.NewIterator(IterOptions{
   696  			UpperBound: keys.MaxKey,
   697  		})
   698  		defer it.Close()
   699  		it.SeekGE(MVCCKey{Key: keys.MinKey})
   700  		ingest(it, 2)
   701  		ingest(it, 1)
   702  	}
   703  
   704  	{
   705  		// Use an incremental iterator to simulate an incremental backup from (1,
   706  		// 2]. Note that incremental iterators are exclusive on the start time and
   707  		// inclusive on the end time. The expectation is that we'll see a write
   708  		// intent error.
   709  		it := NewMVCCIncrementalIterator(db2, MVCCIncrementalIterOptions{
   710  			IterOptions: IterOptions{UpperBound: keys.MaxKey},
   711  			StartTime:   hlc.Timestamp{WallTime: 1},
   712  			EndTime:     hlc.Timestamp{WallTime: 2},
   713  		})
   714  		defer it.Close()
   715  		for it.SeekGE(MVCCKey{Key: keys.MinKey}); ; it.Next() {
   716  			ok, err := it.Valid()
   717  			if err != nil {
   718  				if errors.HasType(err, (*roachpb.WriteIntentError)(nil)) {
   719  					// This is the write intent error we were expecting.
   720  					return
   721  				}
   722  				t.Fatalf("%T: %s", err, err)
   723  			}
   724  			if !ok {
   725  				break
   726  			}
   727  		}
   728  		t.Fatalf("expected write intent error, but found success")
   729  	}
   730  }
   731  
   732  func TestMVCCIterateTimeBound(t *testing.T) {
   733  	defer leaktest.AfterTest(t)()
   734  
   735  	dir, cleanupFn := testutils.TempDir(t)
   736  	defer cleanupFn()
   737  
   738  	const numKeys = 1000
   739  	const numBatches = 10
   740  	const batchTimeSpan = 10
   741  	const valueSize = 8
   742  
   743  	eng, err := loadTestData(filepath.Join(dir, "mvcc_data"),
   744  		numKeys, numBatches, batchTimeSpan, valueSize)
   745  	if err != nil {
   746  		t.Fatal(err)
   747  	}
   748  	defer eng.Close()
   749  
   750  	for _, testCase := range []struct {
   751  		start hlc.Timestamp
   752  		end   hlc.Timestamp
   753  	}{
   754  		// entire time range
   755  		{hlc.Timestamp{WallTime: 0, Logical: 0}, hlc.Timestamp{WallTime: 110, Logical: 0}},
   756  		// one SST
   757  		{hlc.Timestamp{WallTime: 10, Logical: 0}, hlc.Timestamp{WallTime: 19, Logical: 0}},
   758  		// one SST, plus the min of the following SST
   759  		{hlc.Timestamp{WallTime: 10, Logical: 0}, hlc.Timestamp{WallTime: 20, Logical: 0}},
   760  		// one SST, plus the max of the preceding SST
   761  		{hlc.Timestamp{WallTime: 9, Logical: 0}, hlc.Timestamp{WallTime: 19, Logical: 0}},
   762  		// one SST, plus the min of the following and the max of the preceding SST
   763  		{hlc.Timestamp{WallTime: 9, Logical: 0}, hlc.Timestamp{WallTime: 21, Logical: 0}},
   764  		// one SST, not min or max
   765  		{hlc.Timestamp{WallTime: 17, Logical: 0}, hlc.Timestamp{WallTime: 18, Logical: 0}},
   766  		// one SST's max
   767  		{hlc.Timestamp{WallTime: 18, Logical: 0}, hlc.Timestamp{WallTime: 19, Logical: 0}},
   768  		// one SST's min
   769  		{hlc.Timestamp{WallTime: 19, Logical: 0}, hlc.Timestamp{WallTime: 20, Logical: 0}},
   770  		// random endpoints
   771  		{hlc.Timestamp{WallTime: 32, Logical: 0}, hlc.Timestamp{WallTime: 78, Logical: 0}},
   772  	} {
   773  		t.Run(fmt.Sprintf("%s-%s", testCase.start, testCase.end), func(t *testing.T) {
   774  			defer leaktest.AfterTest(t)()
   775  
   776  			var expectedKVs []MVCCKeyValue
   777  			iter := eng.NewIterator(IterOptions{UpperBound: roachpb.KeyMax})
   778  			defer iter.Close()
   779  			iter.SeekGE(MVCCKey{})
   780  			for {
   781  				ok, err := iter.Valid()
   782  				if err != nil {
   783  					t.Fatal(err)
   784  				} else if !ok {
   785  					break
   786  				}
   787  				ts := iter.Key().Timestamp
   788  				if (ts.Less(testCase.end) || testCase.end == ts) && testCase.start.Less(ts) {
   789  					expectedKVs = append(expectedKVs, MVCCKeyValue{Key: iter.Key(), Value: iter.Value()})
   790  				}
   791  				iter.Next()
   792  			}
   793  			if len(expectedKVs) < 1 {
   794  				t.Fatalf("source of truth had no expected KVs; likely a bug in the test itself")
   795  			}
   796  
   797  			assertEqualKVs(eng, keys.MinKey, keys.MaxKey, testCase.start, testCase.end, latest, expectedKVs)(t)
   798  		})
   799  	}
   800  }