github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/bulk/sst_batcher_test.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package bulk_test
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"math/rand"
    17  	"reflect"
    18  	"runtime"
    19  	"strings"
    20  	"testing"
    21  
    22  	"github.com/cockroachdb/cockroach/pkg/base"
    23  	"github.com/cockroachdb/cockroach/pkg/keys"
    24  	"github.com/cockroachdb/cockroach/pkg/kv"
    25  	"github.com/cockroachdb/cockroach/pkg/kv/bulk"
    26  	"github.com/cockroachdb/cockroach/pkg/kv/kvclient/kvcoord"
    27  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
    28  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    29  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    30  	"github.com/cockroachdb/cockroach/pkg/storage"
    31  	"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
    32  	"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
    33  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    34  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    35  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    36  	"github.com/cockroachdb/cockroach/pkg/util/randutil"
    37  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    38  	"github.com/stretchr/testify/require"
    39  )
    40  
    41  func makeIntTableKVs(numKeys, valueSize, maxRevisions int) []storage.MVCCKeyValue {
    42  	prefix := keys.SystemSQLCodec.IndexPrefix(100, 1)
    43  	kvs := make([]storage.MVCCKeyValue, numKeys)
    44  	r, _ := randutil.NewPseudoRand()
    45  
    46  	var k int
    47  	for i := 0; i < numKeys; {
    48  		k += 1 + rand.Intn(100)
    49  		key := encoding.EncodeVarintAscending(append([]byte{}, prefix...), int64(k))
    50  		buf := make([]byte, valueSize)
    51  		randutil.ReadTestdataBytes(r, buf)
    52  		revisions := 1 + r.Intn(maxRevisions)
    53  
    54  		ts := int64(maxRevisions * 100)
    55  		for j := 0; j < revisions && i < numKeys; j++ {
    56  			ts -= 1 + r.Int63n(99)
    57  			kvs[i].Key.Key = key
    58  			kvs[i].Key.Timestamp.WallTime = ts
    59  			kvs[i].Key.Timestamp.Logical = r.Int31()
    60  			kvs[i].Value = roachpb.MakeValueFromString(string(buf)).RawBytes
    61  			i++
    62  		}
    63  	}
    64  	return kvs
    65  }
    66  
    67  func makeRocksSST(t testing.TB, kvs []storage.MVCCKeyValue) []byte {
    68  	w, err := storage.MakeRocksDBSstFileWriter()
    69  	require.NoError(t, err)
    70  	defer w.Close()
    71  
    72  	for i := range kvs {
    73  		if err := w.Put(kvs[i].Key, kvs[i].Value); err != nil {
    74  			t.Fatal(err)
    75  		}
    76  	}
    77  	sst, err := w.Finish()
    78  	require.NoError(t, err)
    79  	return sst
    80  }
    81  
    82  func TestAddBatched(t *testing.T) {
    83  	defer leaktest.AfterTest(t)()
    84  	t.Run("batch=default", func(t *testing.T) {
    85  		runTestImport(t, 32<<20)
    86  	})
    87  	t.Run("batch=1", func(t *testing.T) {
    88  		runTestImport(t, 1)
    89  	})
    90  }
    91  
    92  func runTestImport(t *testing.T, batchSizeValue int64) {
    93  
    94  	ctx := context.Background()
    95  	s, _, kvDB := serverutils.StartServer(t, base.TestServerArgs{})
    96  	defer s.Stopper().Stop(ctx)
    97  
    98  	batchSize := func() int64 { return batchSizeValue }
    99  
   100  	const split1, split2 = 3, 5
   101  
   102  	// Each test case consists of some number of batches of keys, represented as
   103  	// ints [0, 8). Splits are at 3 and 5.
   104  	for i, testCase := range [][][]int{
   105  		// Simple cases, no spanning splits, try first, last, middle, etc in each.
   106  		// r1
   107  		{{0}},
   108  		{{1}},
   109  		{{2}},
   110  		{{0, 1, 2}},
   111  		{{0}, {1}, {2}},
   112  
   113  		// r2
   114  		{{3}},
   115  		{{4}},
   116  		{{3, 4}},
   117  		{{3}, {4}},
   118  
   119  		// r3
   120  		{{5}},
   121  		{{5, 6, 7}},
   122  		{{6}},
   123  
   124  		// batches exactly matching spans.
   125  		{{0, 1, 2}, {3, 4}, {5, 6, 7}},
   126  
   127  		// every key, in its own batch.
   128  		{{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}},
   129  
   130  		// every key in one big batch.
   131  		{{0, 1, 2, 3, 4, 5, 6, 7}},
   132  
   133  		// Look for off-by-ones on and around the splits.
   134  		{{2, 3}},
   135  		{{1, 3}},
   136  		{{2, 4}},
   137  		{{1, 4}},
   138  		{{1, 5}},
   139  		{{2, 5}},
   140  
   141  		// Mixture of split-aligned and non-aligned batches.
   142  		{{1}, {5}, {6}},
   143  		{{1, 2, 3}, {4, 5}, {6, 7}},
   144  		{{0}, {2, 3, 5}, {7}},
   145  		{{0, 4}, {5, 7}},
   146  		{{0, 3}, {4}},
   147  	} {
   148  		t.Run(fmt.Sprintf("%d-%v", i, testCase), func(t *testing.T) {
   149  			prefix := keys.SystemSQLCodec.IndexPrefix(uint32(100+i), 1)
   150  			key := func(i int) roachpb.Key {
   151  				return encoding.EncodeStringAscending(append([]byte{}, prefix...), fmt.Sprintf("k%d", i))
   152  			}
   153  
   154  			if err := kvDB.AdminSplit(ctx, key(split1), key(split1), hlc.MaxTimestamp /* expirationTime */); err != nil {
   155  				t.Fatal(err)
   156  			}
   157  			if err := kvDB.AdminSplit(ctx, key(split2), key(split2), hlc.MaxTimestamp /* expirationTime */); err != nil {
   158  				t.Fatal(err)
   159  			}
   160  
   161  			// We want to make sure our range-aware batching knows about one of our
   162  			// splits to exercise that codepath, but we also want to make sure we
   163  			// still handle an unexpected split, so we make our own range cache and
   164  			// only populate it with one of our two splits.
   165  			mockCache := kvcoord.NewRangeDescriptorCache(s.ClusterSettings(), nil, func() int64 { return 2 << 10 }, s.Stopper())
   166  			addr, err := keys.Addr(key(0))
   167  			if err != nil {
   168  				t.Fatal(err)
   169  			}
   170  			r, _, err := s.DistSenderI().(*kvcoord.DistSender).RangeDescriptorCache().LookupRangeDescriptorWithEvictionToken(
   171  				ctx, addr, nil, false)
   172  			if err != nil {
   173  				t.Fatal(err)
   174  			}
   175  			mockCache.InsertRangeDescriptors(ctx, *r)
   176  
   177  			ts := hlc.Timestamp{WallTime: 100}
   178  			b, err := bulk.MakeBulkAdder(
   179  				ctx, kvDB, mockCache, s.ClusterSettings(), ts, kvserverbase.BulkAdderOptions{MinBufferSize: batchSize(), SSTSize: batchSize}, nil, /* bulkMon */
   180  			)
   181  			if err != nil {
   182  				t.Fatal(err)
   183  			}
   184  
   185  			defer b.Close(ctx)
   186  
   187  			var expected []kv.KeyValue
   188  
   189  			// Since the batcher automatically handles any retries due to spanning the
   190  			// range-bounds internally, it can be difficult to observe from outside if
   191  			// we correctly split on the first attempt to avoid those retires.
   192  			// However we log an event when forced to retry (in case we need to debug)
   193  			// slow requests or something, so we can inspect the trace in the test to
   194  			// determine if requests required the expected number of retries.
   195  
   196  			addCtx, getRec, cancel := tracing.ContextWithRecordingSpan(ctx, "add")
   197  			defer cancel()
   198  			expectedSplitRetries := 0
   199  			for _, batch := range testCase {
   200  				for idx, x := range batch {
   201  					k := key(x)
   202  					// if our adds is batching multiple keys and we've previously added
   203  					// a key prior to split2 and are now adding one after split2, then we
   204  					// should expect this batch to span split2 and thus cause a retry.
   205  					if batchSize() > 1 && idx > 0 && batch[idx-1] < split2 && batch[idx-1] >= split1 && batch[idx] >= split2 {
   206  						expectedSplitRetries = 1
   207  					}
   208  					v := roachpb.MakeValueFromString(fmt.Sprintf("value-%d", x))
   209  					v.Timestamp = ts
   210  					v.InitChecksum(k)
   211  					t.Logf("adding: %v", k)
   212  
   213  					if err := b.Add(addCtx, k, v.RawBytes); err != nil {
   214  						t.Fatal(err)
   215  					}
   216  					expected = append(expected, kv.KeyValue{Key: k, Value: &v})
   217  				}
   218  				if err := b.Flush(addCtx); err != nil {
   219  					t.Fatal(err)
   220  				}
   221  			}
   222  			var splitRetries int
   223  			for _, rec := range getRec() {
   224  				for _, l := range rec.Logs {
   225  					for _, line := range l.Fields {
   226  						if strings.Contains(line.Value, "SSTable cannot be added spanning range bounds") {
   227  							splitRetries++
   228  						}
   229  					}
   230  				}
   231  			}
   232  			if splitRetries != expectedSplitRetries {
   233  				t.Fatalf("expected %d split-caused retries, got %d", expectedSplitRetries, splitRetries)
   234  			}
   235  			cancel()
   236  
   237  			added := b.GetSummary()
   238  			t.Logf("Wrote %d total", added.DataSize)
   239  
   240  			got, err := kvDB.Scan(ctx, key(0), key(8), 0)
   241  			if err != nil {
   242  				t.Fatalf("%+v", err)
   243  			}
   244  
   245  			if !reflect.DeepEqual(got, expected) {
   246  				for i := 0; i < len(got) || i < len(expected); i++ {
   247  					if i < len(expected) {
   248  						t.Logf("expected %d\t%v\t%v", i, expected[i].Key, expected[i].Value)
   249  					}
   250  					if i < len(got) {
   251  						t.Logf("got      %d\t%v\t%v", i, got[i].Key, got[i].Value)
   252  					}
   253  				}
   254  				t.Fatalf("got      %+v\nexpected %+v", got, expected)
   255  			}
   256  		})
   257  	}
   258  }
   259  
   260  type mockSender func(span roachpb.Span) error
   261  
   262  func (m mockSender) AddSSTable(
   263  	ctx context.Context,
   264  	begin, end interface{},
   265  	data []byte,
   266  	disallowShadowing bool,
   267  	_ *enginepb.MVCCStats,
   268  	ingestAsWrites bool,
   269  ) error {
   270  	return m(roachpb.Span{Key: begin.(roachpb.Key), EndKey: end.(roachpb.Key)})
   271  }
   272  
   273  func (m mockSender) SplitAndScatter(ctx context.Context, _ roachpb.Key, _ hlc.Timestamp) error {
   274  	return nil
   275  }
   276  
   277  // TestAddBigSpanningSSTWithSplits tests a situation where a large
   278  // spanning SST is being ingested over a span with a lot of splits.
   279  func TestAddBigSpanningSSTWithSplits(t *testing.T) {
   280  	defer leaktest.AfterTest(t)()
   281  
   282  	if testing.Short() {
   283  		t.Skip("this test needs to do a larger SST to see the quadratic mem usage on retries kick in.")
   284  	}
   285  
   286  	const numKeys, valueSize, splitEvery = 500, 5000, 1
   287  
   288  	// Make some KVs and grab [start,end). Generate one extra for exclusive `end`.
   289  	kvs := makeIntTableKVs(numKeys+1, valueSize, 1)
   290  	start, end := kvs[0].Key.Key, kvs[numKeys].Key.Key
   291  	kvs = kvs[:numKeys]
   292  
   293  	// Create a large SST.
   294  	sst := makeRocksSST(t, kvs)
   295  
   296  	var splits []roachpb.Key
   297  	for i := range kvs {
   298  		if i%splitEvery == 0 {
   299  			splits = append(splits, kvs[i].Key.Key)
   300  		}
   301  	}
   302  
   303  	// Keep track of the memory.
   304  	getMem := func() uint64 {
   305  		var stats runtime.MemStats
   306  		runtime.ReadMemStats(&stats)
   307  		return stats.HeapInuse
   308  	}
   309  	var early, late uint64
   310  	var totalAdditionAttempts int
   311  	mock := mockSender(func(span roachpb.Span) error {
   312  		totalAdditionAttempts++
   313  		for i := range splits {
   314  			if span.ContainsKey(splits[i]) && !span.Key.Equal(splits[i]) {
   315  				earlySplit := numKeys / 100
   316  				if i == earlySplit {
   317  					early = getMem()
   318  				} else if i == len(splits)-earlySplit {
   319  					late = getMem()
   320  				}
   321  				return &roachpb.RangeKeyMismatchError{
   322  					MismatchedRange: roachpb.RangeDescriptor{EndKey: roachpb.RKey(splits[i])},
   323  				}
   324  			}
   325  		}
   326  		return nil
   327  	})
   328  
   329  	const kb = 1 << 10
   330  
   331  	t.Logf("Adding %dkb sst spanning %d splits from %v to %v", len(sst)/kb, len(splits), start, end)
   332  	if _, err := bulk.AddSSTable(
   333  		context.Background(), mock, start, end, sst, false /* disallowShadowing */, enginepb.MVCCStats{}, cluster.MakeTestingClusterSettings(),
   334  	); err != nil {
   335  		t.Fatal(err)
   336  	}
   337  	t.Logf("Adding took %d total attempts", totalAdditionAttempts)
   338  	if late > early*8 {
   339  		t.Fatalf("Mem usage grew from %dkb before grew to %dkb later (%.2fx)",
   340  			early/kb, late/kb, float64(late)/float64(early))
   341  	}
   342  }