github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/bulk/sst_batcher_test.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package bulk_test 12 13 import ( 14 "context" 15 "fmt" 16 "math/rand" 17 "reflect" 18 "runtime" 19 "strings" 20 "testing" 21 22 "github.com/cockroachdb/cockroach/pkg/base" 23 "github.com/cockroachdb/cockroach/pkg/keys" 24 "github.com/cockroachdb/cockroach/pkg/kv" 25 "github.com/cockroachdb/cockroach/pkg/kv/bulk" 26 "github.com/cockroachdb/cockroach/pkg/kv/kvclient/kvcoord" 27 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" 28 "github.com/cockroachdb/cockroach/pkg/roachpb" 29 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 30 "github.com/cockroachdb/cockroach/pkg/storage" 31 "github.com/cockroachdb/cockroach/pkg/storage/enginepb" 32 "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" 33 "github.com/cockroachdb/cockroach/pkg/util/encoding" 34 "github.com/cockroachdb/cockroach/pkg/util/hlc" 35 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 36 "github.com/cockroachdb/cockroach/pkg/util/randutil" 37 "github.com/cockroachdb/cockroach/pkg/util/tracing" 38 "github.com/stretchr/testify/require" 39 ) 40 41 func makeIntTableKVs(numKeys, valueSize, maxRevisions int) []storage.MVCCKeyValue { 42 prefix := keys.SystemSQLCodec.IndexPrefix(100, 1) 43 kvs := make([]storage.MVCCKeyValue, numKeys) 44 r, _ := randutil.NewPseudoRand() 45 46 var k int 47 for i := 0; i < numKeys; { 48 k += 1 + rand.Intn(100) 49 key := encoding.EncodeVarintAscending(append([]byte{}, prefix...), int64(k)) 50 buf := make([]byte, valueSize) 51 randutil.ReadTestdataBytes(r, buf) 52 revisions := 1 + r.Intn(maxRevisions) 53 54 ts := int64(maxRevisions * 100) 55 for j := 0; j < revisions && i < numKeys; j++ { 56 ts -= 1 + r.Int63n(99) 57 kvs[i].Key.Key = key 58 kvs[i].Key.Timestamp.WallTime = ts 59 kvs[i].Key.Timestamp.Logical = r.Int31() 60 kvs[i].Value = roachpb.MakeValueFromString(string(buf)).RawBytes 61 i++ 62 } 63 } 64 return kvs 65 } 66 67 func makeRocksSST(t testing.TB, kvs []storage.MVCCKeyValue) []byte { 68 w, err := storage.MakeRocksDBSstFileWriter() 69 require.NoError(t, err) 70 defer w.Close() 71 72 for i := range kvs { 73 if err := w.Put(kvs[i].Key, kvs[i].Value); err != nil { 74 t.Fatal(err) 75 } 76 } 77 sst, err := w.Finish() 78 require.NoError(t, err) 79 return sst 80 } 81 82 func TestAddBatched(t *testing.T) { 83 defer leaktest.AfterTest(t)() 84 t.Run("batch=default", func(t *testing.T) { 85 runTestImport(t, 32<<20) 86 }) 87 t.Run("batch=1", func(t *testing.T) { 88 runTestImport(t, 1) 89 }) 90 } 91 92 func runTestImport(t *testing.T, batchSizeValue int64) { 93 94 ctx := context.Background() 95 s, _, kvDB := serverutils.StartServer(t, base.TestServerArgs{}) 96 defer s.Stopper().Stop(ctx) 97 98 batchSize := func() int64 { return batchSizeValue } 99 100 const split1, split2 = 3, 5 101 102 // Each test case consists of some number of batches of keys, represented as 103 // ints [0, 8). Splits are at 3 and 5. 104 for i, testCase := range [][][]int{ 105 // Simple cases, no spanning splits, try first, last, middle, etc in each. 106 // r1 107 {{0}}, 108 {{1}}, 109 {{2}}, 110 {{0, 1, 2}}, 111 {{0}, {1}, {2}}, 112 113 // r2 114 {{3}}, 115 {{4}}, 116 {{3, 4}}, 117 {{3}, {4}}, 118 119 // r3 120 {{5}}, 121 {{5, 6, 7}}, 122 {{6}}, 123 124 // batches exactly matching spans. 125 {{0, 1, 2}, {3, 4}, {5, 6, 7}}, 126 127 // every key, in its own batch. 128 {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}}, 129 130 // every key in one big batch. 131 {{0, 1, 2, 3, 4, 5, 6, 7}}, 132 133 // Look for off-by-ones on and around the splits. 134 {{2, 3}}, 135 {{1, 3}}, 136 {{2, 4}}, 137 {{1, 4}}, 138 {{1, 5}}, 139 {{2, 5}}, 140 141 // Mixture of split-aligned and non-aligned batches. 142 {{1}, {5}, {6}}, 143 {{1, 2, 3}, {4, 5}, {6, 7}}, 144 {{0}, {2, 3, 5}, {7}}, 145 {{0, 4}, {5, 7}}, 146 {{0, 3}, {4}}, 147 } { 148 t.Run(fmt.Sprintf("%d-%v", i, testCase), func(t *testing.T) { 149 prefix := keys.SystemSQLCodec.IndexPrefix(uint32(100+i), 1) 150 key := func(i int) roachpb.Key { 151 return encoding.EncodeStringAscending(append([]byte{}, prefix...), fmt.Sprintf("k%d", i)) 152 } 153 154 if err := kvDB.AdminSplit(ctx, key(split1), key(split1), hlc.MaxTimestamp /* expirationTime */); err != nil { 155 t.Fatal(err) 156 } 157 if err := kvDB.AdminSplit(ctx, key(split2), key(split2), hlc.MaxTimestamp /* expirationTime */); err != nil { 158 t.Fatal(err) 159 } 160 161 // We want to make sure our range-aware batching knows about one of our 162 // splits to exercise that codepath, but we also want to make sure we 163 // still handle an unexpected split, so we make our own range cache and 164 // only populate it with one of our two splits. 165 mockCache := kvcoord.NewRangeDescriptorCache(s.ClusterSettings(), nil, func() int64 { return 2 << 10 }, s.Stopper()) 166 addr, err := keys.Addr(key(0)) 167 if err != nil { 168 t.Fatal(err) 169 } 170 r, _, err := s.DistSenderI().(*kvcoord.DistSender).RangeDescriptorCache().LookupRangeDescriptorWithEvictionToken( 171 ctx, addr, nil, false) 172 if err != nil { 173 t.Fatal(err) 174 } 175 mockCache.InsertRangeDescriptors(ctx, *r) 176 177 ts := hlc.Timestamp{WallTime: 100} 178 b, err := bulk.MakeBulkAdder( 179 ctx, kvDB, mockCache, s.ClusterSettings(), ts, kvserverbase.BulkAdderOptions{MinBufferSize: batchSize(), SSTSize: batchSize}, nil, /* bulkMon */ 180 ) 181 if err != nil { 182 t.Fatal(err) 183 } 184 185 defer b.Close(ctx) 186 187 var expected []kv.KeyValue 188 189 // Since the batcher automatically handles any retries due to spanning the 190 // range-bounds internally, it can be difficult to observe from outside if 191 // we correctly split on the first attempt to avoid those retires. 192 // However we log an event when forced to retry (in case we need to debug) 193 // slow requests or something, so we can inspect the trace in the test to 194 // determine if requests required the expected number of retries. 195 196 addCtx, getRec, cancel := tracing.ContextWithRecordingSpan(ctx, "add") 197 defer cancel() 198 expectedSplitRetries := 0 199 for _, batch := range testCase { 200 for idx, x := range batch { 201 k := key(x) 202 // if our adds is batching multiple keys and we've previously added 203 // a key prior to split2 and are now adding one after split2, then we 204 // should expect this batch to span split2 and thus cause a retry. 205 if batchSize() > 1 && idx > 0 && batch[idx-1] < split2 && batch[idx-1] >= split1 && batch[idx] >= split2 { 206 expectedSplitRetries = 1 207 } 208 v := roachpb.MakeValueFromString(fmt.Sprintf("value-%d", x)) 209 v.Timestamp = ts 210 v.InitChecksum(k) 211 t.Logf("adding: %v", k) 212 213 if err := b.Add(addCtx, k, v.RawBytes); err != nil { 214 t.Fatal(err) 215 } 216 expected = append(expected, kv.KeyValue{Key: k, Value: &v}) 217 } 218 if err := b.Flush(addCtx); err != nil { 219 t.Fatal(err) 220 } 221 } 222 var splitRetries int 223 for _, rec := range getRec() { 224 for _, l := range rec.Logs { 225 for _, line := range l.Fields { 226 if strings.Contains(line.Value, "SSTable cannot be added spanning range bounds") { 227 splitRetries++ 228 } 229 } 230 } 231 } 232 if splitRetries != expectedSplitRetries { 233 t.Fatalf("expected %d split-caused retries, got %d", expectedSplitRetries, splitRetries) 234 } 235 cancel() 236 237 added := b.GetSummary() 238 t.Logf("Wrote %d total", added.DataSize) 239 240 got, err := kvDB.Scan(ctx, key(0), key(8), 0) 241 if err != nil { 242 t.Fatalf("%+v", err) 243 } 244 245 if !reflect.DeepEqual(got, expected) { 246 for i := 0; i < len(got) || i < len(expected); i++ { 247 if i < len(expected) { 248 t.Logf("expected %d\t%v\t%v", i, expected[i].Key, expected[i].Value) 249 } 250 if i < len(got) { 251 t.Logf("got %d\t%v\t%v", i, got[i].Key, got[i].Value) 252 } 253 } 254 t.Fatalf("got %+v\nexpected %+v", got, expected) 255 } 256 }) 257 } 258 } 259 260 type mockSender func(span roachpb.Span) error 261 262 func (m mockSender) AddSSTable( 263 ctx context.Context, 264 begin, end interface{}, 265 data []byte, 266 disallowShadowing bool, 267 _ *enginepb.MVCCStats, 268 ingestAsWrites bool, 269 ) error { 270 return m(roachpb.Span{Key: begin.(roachpb.Key), EndKey: end.(roachpb.Key)}) 271 } 272 273 func (m mockSender) SplitAndScatter(ctx context.Context, _ roachpb.Key, _ hlc.Timestamp) error { 274 return nil 275 } 276 277 // TestAddBigSpanningSSTWithSplits tests a situation where a large 278 // spanning SST is being ingested over a span with a lot of splits. 279 func TestAddBigSpanningSSTWithSplits(t *testing.T) { 280 defer leaktest.AfterTest(t)() 281 282 if testing.Short() { 283 t.Skip("this test needs to do a larger SST to see the quadratic mem usage on retries kick in.") 284 } 285 286 const numKeys, valueSize, splitEvery = 500, 5000, 1 287 288 // Make some KVs and grab [start,end). Generate one extra for exclusive `end`. 289 kvs := makeIntTableKVs(numKeys+1, valueSize, 1) 290 start, end := kvs[0].Key.Key, kvs[numKeys].Key.Key 291 kvs = kvs[:numKeys] 292 293 // Create a large SST. 294 sst := makeRocksSST(t, kvs) 295 296 var splits []roachpb.Key 297 for i := range kvs { 298 if i%splitEvery == 0 { 299 splits = append(splits, kvs[i].Key.Key) 300 } 301 } 302 303 // Keep track of the memory. 304 getMem := func() uint64 { 305 var stats runtime.MemStats 306 runtime.ReadMemStats(&stats) 307 return stats.HeapInuse 308 } 309 var early, late uint64 310 var totalAdditionAttempts int 311 mock := mockSender(func(span roachpb.Span) error { 312 totalAdditionAttempts++ 313 for i := range splits { 314 if span.ContainsKey(splits[i]) && !span.Key.Equal(splits[i]) { 315 earlySplit := numKeys / 100 316 if i == earlySplit { 317 early = getMem() 318 } else if i == len(splits)-earlySplit { 319 late = getMem() 320 } 321 return &roachpb.RangeKeyMismatchError{ 322 MismatchedRange: roachpb.RangeDescriptor{EndKey: roachpb.RKey(splits[i])}, 323 } 324 } 325 } 326 return nil 327 }) 328 329 const kb = 1 << 10 330 331 t.Logf("Adding %dkb sst spanning %d splits from %v to %v", len(sst)/kb, len(splits), start, end) 332 if _, err := bulk.AddSSTable( 333 context.Background(), mock, start, end, sst, false /* disallowShadowing */, enginepb.MVCCStats{}, cluster.MakeTestingClusterSettings(), 334 ); err != nil { 335 t.Fatal(err) 336 } 337 t.Logf("Adding took %d total attempts", totalAdditionAttempts) 338 if late > early*8 { 339 t.Fatalf("Mem usage grew from %dkb before grew to %dkb later (%.2fx)", 340 early/kb, late/kb, float64(late)/float64(early)) 341 } 342 }