github.com/cockroachdb/pebble@v1.1.2/internal/rangekey/coalesce_test.go (about) 1 // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package rangekey 6 7 import ( 8 "bytes" 9 "fmt" 10 "io" 11 "math" 12 "math/rand" 13 "strconv" 14 "strings" 15 "testing" 16 "time" 17 18 "github.com/cockroachdb/datadriven" 19 "github.com/cockroachdb/pebble/internal/base" 20 "github.com/cockroachdb/pebble/internal/keyspan" 21 "github.com/cockroachdb/pebble/internal/testkeys" 22 "github.com/pmezard/go-difflib/difflib" 23 "github.com/stretchr/testify/require" 24 ) 25 26 func TestCoalesce(t *testing.T) { 27 var buf bytes.Buffer 28 eq := testkeys.Comparer.Equal 29 cmp := testkeys.Comparer.Compare 30 31 datadriven.RunTest(t, "testdata/coalesce", func(t *testing.T, td *datadriven.TestData) string { 32 switch td.Cmd { 33 case "coalesce": 34 buf.Reset() 35 span := keyspan.ParseSpan(td.Input) 36 coalesced := keyspan.Span{ 37 Start: span.Start, 38 End: span.End, 39 } 40 if err := Coalesce(cmp, eq, span.Keys, &coalesced.Keys); err != nil { 41 return err.Error() 42 } 43 fmt.Fprintln(&buf, coalesced) 44 return buf.String() 45 default: 46 return fmt.Sprintf("unrecognized command %q", td.Cmd) 47 } 48 }) 49 } 50 51 func TestIter(t *testing.T) { 52 eq := testkeys.Comparer.Equal 53 cmp := testkeys.Comparer.Compare 54 var iter keyspan.MergingIter 55 var buf bytes.Buffer 56 57 datadriven.RunTest(t, "testdata/iter", func(t *testing.T, td *datadriven.TestData) string { 58 buf.Reset() 59 switch td.Cmd { 60 case "define": 61 visibleSeqNum := base.InternalKeySeqNumMax 62 for _, arg := range td.CmdArgs { 63 if arg.Key == "visible-seq-num" { 64 var err error 65 visibleSeqNum, err = strconv.ParseUint(arg.Vals[0], 10, 64) 66 require.NoError(t, err) 67 } 68 } 69 70 var spans []keyspan.Span 71 lines := strings.Split(strings.TrimSpace(td.Input), "\n") 72 for _, line := range lines { 73 spans = append(spans, keyspan.ParseSpan(line)) 74 } 75 transform := keyspan.TransformerFunc(func(cmp base.Compare, s keyspan.Span, dst *keyspan.Span) error { 76 keysBySuffix := keyspan.KeysBySuffix{ 77 Cmp: cmp, 78 Keys: dst.Keys[:0], 79 } 80 if err := coalesce(eq, &keysBySuffix, visibleSeqNum, s.Keys); err != nil { 81 return err 82 } 83 // Update the span with the (potentially reduced) keys slice. coalesce left 84 // the keys in *dst sorted by suffix. Re-sort them by trailer. 85 dst.Keys = keysBySuffix.Keys 86 keyspan.SortKeysByTrailer(&dst.Keys) 87 dst.Start = s.Start 88 dst.End = s.End 89 return nil 90 }) 91 iter.Init(cmp, transform, new(keyspan.MergingBuffers), keyspan.NewIter(cmp, spans)) 92 return "OK" 93 case "iter": 94 buf.Reset() 95 lines := strings.Split(strings.TrimSpace(td.Input), "\n") 96 for _, line := range lines { 97 line = strings.TrimSpace(line) 98 i := strings.IndexByte(line, ' ') 99 iterCmd := line 100 if i > 0 { 101 iterCmd = string(line[:i]) 102 } 103 var s *keyspan.Span 104 switch iterCmd { 105 case "first": 106 s = iter.First() 107 case "last": 108 s = iter.Last() 109 case "next": 110 s = iter.Next() 111 case "prev": 112 s = iter.Prev() 113 case "seek-ge": 114 s = iter.SeekGE([]byte(strings.TrimSpace(line[i:]))) 115 case "seek-lt": 116 s = iter.SeekLT([]byte(strings.TrimSpace(line[i:]))) 117 default: 118 return fmt.Sprintf("unrecognized iter command %q", iterCmd) 119 } 120 require.NoError(t, iter.Error()) 121 fmt.Fprint(&buf, s) 122 if buf.Len() > 0 { 123 fmt.Fprintln(&buf) 124 } 125 } 126 return buf.String() 127 default: 128 return fmt.Sprintf("unrecognized command %q", td.Cmd) 129 } 130 }) 131 } 132 133 func TestDefragmenting(t *testing.T) { 134 cmp := testkeys.Comparer.Compare 135 136 var buf bytes.Buffer 137 var spans []keyspan.Span 138 var hasPrefix bool 139 var prefix []byte 140 datadriven.RunTest(t, "testdata/defragmenting_iter", func(t *testing.T, td *datadriven.TestData) string { 141 buf.Reset() 142 switch td.Cmd { 143 case "define": 144 spans = spans[:0] 145 lines := strings.Split(strings.TrimSpace(td.Input), "\n") 146 for _, line := range lines { 147 spans = append(spans, keyspan.ParseSpan(line)) 148 } 149 return "" 150 case "iter": 151 var userIterCfg UserIteratorConfig 152 iter := userIterCfg.Init(testkeys.Comparer, base.InternalKeySeqNumMax, 153 nil /* lower */, nil, /* upper */ 154 &hasPrefix, &prefix, false /* internalKeys */, new(Buffers), 155 keyspan.NewIter(cmp, spans)) 156 for _, line := range strings.Split(td.Input, "\n") { 157 runIterOp(&buf, iter, line) 158 } 159 return strings.TrimSpace(buf.String()) 160 default: 161 return fmt.Sprintf("unrecognized command %q", td.Cmd) 162 } 163 }) 164 } 165 166 func TestDefragmentingIter_Randomized(t *testing.T) { 167 seed := time.Now().UnixNano() 168 for i := int64(0); i < 100; i++ { 169 testDefragmentingIteRandomizedOnce(t, seed+i) 170 } 171 } 172 173 func TestDefragmentingIter_RandomizedFixedSeed(t *testing.T) { 174 const seed = 1648173101214881000 175 testDefragmentingIteRandomizedOnce(t, seed) 176 } 177 178 func testDefragmentingIteRandomizedOnce(t *testing.T, seed int64) { 179 cmp := testkeys.Comparer.Compare 180 formatKey := testkeys.Comparer.FormatKey 181 182 rng := rand.New(rand.NewSource(seed)) 183 t.Logf("seed = %d", seed) 184 185 // Use a key space of alphanumeric strings, with a random max length between 186 // 1-2. Repeat keys are more common at the lower max lengths. 187 ks := testkeys.Alpha(rng.Intn(2) + 1) 188 189 // Generate between 1-15 range keys. 190 const maxRangeKeys = 15 191 var original, fragmented []keyspan.Span 192 numRangeKeys := 1 + rng.Intn(maxRangeKeys) 193 for i := 0; i < numRangeKeys; i++ { 194 startIdx := rng.Int63n(ks.Count()) 195 endIdx := rng.Int63n(ks.Count()) 196 for startIdx == endIdx { 197 endIdx = rng.Int63n(ks.Count()) 198 } 199 if startIdx > endIdx { 200 startIdx, endIdx = endIdx, startIdx 201 } 202 203 key := keyspan.Key{ 204 Trailer: base.MakeTrailer(uint64(i), base.InternalKeyKindRangeKeySet), 205 Value: []byte(fmt.Sprintf("v%d", rng.Intn(3))), 206 } 207 // Generate suffixes 0, 1, 2, or 3 with 0 indicating none. 208 if suffix := rng.Int63n(4); suffix > 0 { 209 key.Suffix = testkeys.Suffix(suffix) 210 } 211 original = append(original, keyspan.Span{ 212 Start: testkeys.Key(ks, startIdx), 213 End: testkeys.Key(ks, endIdx), 214 Keys: []keyspan.Key{key}, 215 }) 216 217 for startIdx < endIdx { 218 width := rng.Int63n(endIdx-startIdx) + 1 219 fragmented = append(fragmented, keyspan.Span{ 220 Start: testkeys.Key(ks, startIdx), 221 End: testkeys.Key(ks, startIdx+width), 222 Keys: []keyspan.Key{key}, 223 }) 224 startIdx += width 225 } 226 } 227 228 // Both the original and the deliberately fragmented spans may contain 229 // overlaps, so we need to sort and fragment them. 230 original = fragment(cmp, formatKey, original) 231 fragmented = fragment(cmp, formatKey, fragmented) 232 233 var referenceCfg, fragmentedCfg UserIteratorConfig 234 referenceIter := referenceCfg.Init(testkeys.Comparer, base.InternalKeySeqNumMax, 235 nil /* lower */, nil, /* upper */ 236 new(bool), new([]byte), false /* internalKeys */, new(Buffers), 237 keyspan.NewIter(cmp, original)) 238 fragmentedIter := fragmentedCfg.Init(testkeys.Comparer, base.InternalKeySeqNumMax, 239 nil /* lower */, nil, /* upper */ 240 new(bool), new([]byte), false /* internalKeys */, new(Buffers), 241 keyspan.NewIter(cmp, fragmented)) 242 243 // Generate 100 random operations and run them against both iterators. 244 const numIterOps = 100 245 type opKind struct { 246 weight int 247 fn func() string 248 } 249 ops := []opKind{ 250 {weight: 2, fn: func() string { return "first" }}, 251 {weight: 2, fn: func() string { return "last" }}, 252 {weight: 50, fn: func() string { return "next" }}, 253 {weight: 50, fn: func() string { return "prev" }}, 254 {weight: 5, fn: func() string { 255 k := testkeys.Key(ks, rng.Int63n(ks.Count())) 256 return fmt.Sprintf("seekge(%s)", k) 257 }}, 258 {weight: 5, fn: func() string { 259 k := testkeys.Key(ks, rng.Int63n(ks.Count())) 260 return fmt.Sprintf("seeklt(%s)", k) 261 }}, 262 } 263 var totalWeight int 264 for _, op := range ops { 265 totalWeight += op.weight 266 } 267 var referenceHistory, fragmentedHistory bytes.Buffer 268 for i := 0; i < numIterOps; i++ { 269 p := rng.Intn(totalWeight) 270 opIndex := 0 271 if i == 0 { 272 // First op is always a First(). 273 } else { 274 for i, op := range ops { 275 if p < op.weight { 276 opIndex = i 277 break 278 } 279 p -= op.weight 280 } 281 } 282 op := ops[opIndex].fn() 283 runIterOp(&referenceHistory, referenceIter, op) 284 runIterOp(&fragmentedHistory, fragmentedIter, op) 285 if !bytes.Equal(referenceHistory.Bytes(), fragmentedHistory.Bytes()) { 286 t.Fatal(debugContext(cmp, formatKey, original, fragmented, 287 referenceHistory.String(), fragmentedHistory.String())) 288 } 289 } 290 } 291 292 func fragment(cmp base.Compare, formatKey base.FormatKey, spans []keyspan.Span) []keyspan.Span { 293 keyspan.Sort(cmp, spans) 294 var fragments []keyspan.Span 295 f := keyspan.Fragmenter{ 296 Cmp: cmp, 297 Format: formatKey, 298 Emit: func(f keyspan.Span) { 299 fragments = append(fragments, f) 300 }, 301 } 302 for _, s := range spans { 303 f.Add(s) 304 } 305 f.Finish() 306 return fragments 307 } 308 309 func debugContext( 310 cmp base.Compare, 311 formatKey base.FormatKey, 312 original, fragmented []keyspan.Span, 313 refHistory, fragHistory string, 314 ) string { 315 var buf bytes.Buffer 316 fmt.Fprintln(&buf, "Reference:") 317 for _, s := range original { 318 fmt.Fprintln(&buf, s) 319 } 320 fmt.Fprintln(&buf) 321 fmt.Fprintln(&buf, "Fragmented:") 322 for _, s := range fragmented { 323 fmt.Fprintln(&buf, s) 324 } 325 fmt.Fprintln(&buf) 326 fmt.Fprintln(&buf, "\nOperations diff:") 327 diff, err := difflib.GetUnifiedDiffString(difflib.UnifiedDiff{ 328 A: difflib.SplitLines(refHistory), 329 B: difflib.SplitLines(fragHistory), 330 Context: 5, 331 }) 332 if err != nil { 333 panic(err) 334 } 335 fmt.Fprintln(&buf, diff) 336 return buf.String() 337 } 338 339 var iterDelim = map[rune]bool{',': true, ' ': true, '(': true, ')': true, '"': true} 340 341 func runIterOp(w io.Writer, it keyspan.FragmentIterator, op string) { 342 fields := strings.FieldsFunc(op, func(r rune) bool { return iterDelim[r] }) 343 var s *keyspan.Span 344 switch strings.ToLower(fields[0]) { 345 case "first": 346 s = it.First() 347 case "last": 348 s = it.Last() 349 case "seekge": 350 s = it.SeekGE([]byte(fields[1])) 351 case "seeklt": 352 s = it.SeekLT([]byte(fields[1])) 353 case "next": 354 s = it.Next() 355 case "prev": 356 s = it.Prev() 357 default: 358 panic(fmt.Sprintf("unrecognized iter op %q", fields[0])) 359 } 360 fmt.Fprintf(w, "%-10s", op) 361 if s == nil { 362 fmt.Fprintln(w, ".") 363 return 364 } 365 fmt.Fprintln(w, s) 366 } 367 368 func BenchmarkTransform(b *testing.B) { 369 var bufs Buffers 370 var ui UserIteratorConfig 371 reinit := func() { 372 bufs.PrepareForReuse() 373 _ = ui.Init(testkeys.Comparer, math.MaxUint64, nil, nil, new(bool), nil, true /* internalKeys */, &bufs) 374 } 375 376 for _, shadowing := range []bool{false, true} { 377 b.Run(fmt.Sprintf("shadowing=%t", shadowing), func(b *testing.B) { 378 for n := 1; n <= 128; n *= 2 { 379 b.Run(fmt.Sprintf("keys=%d", n), func(b *testing.B) { 380 rng := rand.New(rand.NewSource(233473048763)) 381 reinit() 382 383 suffixes := make([][]byte, n) 384 for s := range suffixes { 385 if shadowing { 386 suffixes[s] = testkeys.Suffix(int64(rng.Intn(n))) 387 } else { 388 suffixes[s] = testkeys.Suffix(int64(s)) 389 } 390 } 391 rng.Shuffle(len(suffixes), func(i, j int) { 392 suffixes[i], suffixes[j] = suffixes[j], suffixes[i] 393 }) 394 395 var keys []keyspan.Key 396 for k := 0; k < n; k++ { 397 keys = append(keys, keyspan.Key{ 398 Trailer: base.MakeTrailer(uint64(n-k), base.InternalKeyKindRangeKeySet), 399 Suffix: suffixes[k], 400 }) 401 } 402 dst := keyspan.Span{Keys: make([]keyspan.Key, 0, len(keys))} 403 b.ResetTimer() 404 405 for i := 0; i < b.N; i++ { 406 err := ui.Transform(testkeys.Comparer.Compare, keyspan.Span{Keys: keys}, &dst) 407 if err != nil { 408 b.Fatal(err) 409 } 410 dst.Keys = dst.Keys[:0] 411 } 412 }) 413 } 414 }) 415 } 416 }