github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/internal/keyspan/defragment_test.go (about) 1 // Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package keyspan 6 7 import ( 8 "bytes" 9 "fmt" 10 "math/rand" 11 "sort" 12 "strings" 13 "testing" 14 "time" 15 16 "github.com/cockroachdb/datadriven" 17 "github.com/cockroachdb/pebble/internal/base" 18 "github.com/cockroachdb/pebble/internal/testkeys" 19 "github.com/pmezard/go-difflib/difflib" 20 ) 21 22 func TestDefragmentingIter(t *testing.T) { 23 comparer := testkeys.Comparer 24 cmp := comparer.Compare 25 internalEqual := DefragmentInternal 26 alwaysEqual := DefragmentMethodFunc(func(_ base.Equal, _, _ *Span) bool { return true }) 27 staticReducer := StaticDefragmentReducer 28 collectReducer := func(cur, next []Key) []Key { 29 c := keysBySeqNumKind(append(cur, next...)) 30 sort.Sort(&c) 31 return c 32 } 33 34 var buf bytes.Buffer 35 var spans []Span 36 datadriven.RunTest(t, "testdata/defragmenting_iter", func(t *testing.T, td *datadriven.TestData) string { 37 buf.Reset() 38 switch td.Cmd { 39 case "define": 40 spans = spans[:0] 41 lines := strings.Split(strings.TrimSpace(td.Input), "\n") 42 for _, line := range lines { 43 spans = append(spans, ParseSpan(line)) 44 } 45 return "" 46 case "iter": 47 equal := internalEqual 48 reducer := staticReducer 49 var probes []probe 50 for _, cmdArg := range td.CmdArgs { 51 switch cmd := cmdArg.Key; cmd { 52 case "equal": 53 if len(cmdArg.Vals) != 1 { 54 return fmt.Sprintf("only one equal func expected; got %d", len(cmdArg.Vals)) 55 } 56 switch val := cmdArg.Vals[0]; val { 57 case "internal": 58 equal = internalEqual 59 case "always": 60 equal = alwaysEqual 61 default: 62 return fmt.Sprintf("unknown reducer %s", val) 63 } 64 case "reducer": 65 if len(cmdArg.Vals) != 1 { 66 return fmt.Sprintf("only one reducer expected; got %d", len(cmdArg.Vals)) 67 } 68 switch val := cmdArg.Vals[0]; val { 69 case "collect": 70 reducer = collectReducer 71 case "static": 72 reducer = staticReducer 73 default: 74 return fmt.Sprintf("unknown reducer %s", val) 75 } 76 case "probes": 77 probes = parseProbes(cmdArg.Vals...) 78 default: 79 return fmt.Sprintf("unknown command: %s", cmd) 80 } 81 } 82 var miter MergingIter 83 miter.Init(cmp, noopTransform, new(MergingBuffers), NewIter(cmp, spans)) 84 innerIter := attachProbes(&miter, probeContext{log: &buf}, probes...) 85 var iter DefragmentingIter 86 iter.Init(comparer, innerIter, equal, reducer, new(DefragmentingBuffers)) 87 for _, line := range strings.Split(td.Input, "\n") { 88 runIterOp(&buf, &iter, line) 89 fmt.Fprintln(&buf) 90 } 91 return strings.TrimSpace(buf.String()) 92 default: 93 return fmt.Sprintf("unrecognized command %q", td.Cmd) 94 } 95 }) 96 } 97 98 func TestDefragmentingIter_Randomized(t *testing.T) { 99 seed := time.Now().UnixNano() 100 for i := int64(0); i < 100; i++ { 101 testDefragmentingIteRandomizedOnce(t, seed+i) 102 } 103 } 104 105 func TestDefragmentingIter_RandomizedFixedSeed(t *testing.T) { 106 const seed = 1648173101214881000 107 testDefragmentingIteRandomizedOnce(t, seed) 108 } 109 110 func testDefragmentingIteRandomizedOnce(t *testing.T, seed int64) { 111 comparer := testkeys.Comparer 112 cmp := comparer.Compare 113 formatKey := comparer.FormatKey 114 115 rng := rand.New(rand.NewSource(seed)) 116 t.Logf("seed = %d", seed) 117 118 // Use a key space of alphanumeric strings, with a random max length between 119 // 1-2. Repeat keys are more common at the lower max lengths. 120 ks := testkeys.Alpha(rng.Intn(2) + 1) 121 122 // Generate between 1-15 range keys. 123 const maxRangeKeys = 15 124 var original, fragmented []Span 125 numRangeKeys := 1 + rng.Intn(maxRangeKeys) 126 for i := 0; i < numRangeKeys; i++ { 127 startIdx := rng.Int63n(ks.Count()) 128 endIdx := rng.Int63n(ks.Count()) 129 for startIdx == endIdx { 130 endIdx = rng.Int63n(ks.Count()) 131 } 132 if startIdx > endIdx { 133 startIdx, endIdx = endIdx, startIdx 134 } 135 136 key := Key{ 137 Trailer: base.MakeTrailer(uint64(i), base.InternalKeyKindRangeKeySet), 138 Value: []byte(fmt.Sprintf("v%d", rng.Intn(3))), 139 } 140 // Generate suffixes 0, 1, 2, or 3 with 0 indicating none. 141 if suffix := rng.Int63n(4); suffix > 0 { 142 key.Suffix = testkeys.Suffix(suffix) 143 } 144 original = append(original, Span{ 145 Start: testkeys.Key(ks, startIdx), 146 End: testkeys.Key(ks, endIdx), 147 Keys: []Key{key}, 148 }) 149 150 for startIdx < endIdx { 151 width := rng.Int63n(endIdx-startIdx) + 1 152 fragmented = append(fragmented, Span{ 153 Start: testkeys.Key(ks, startIdx), 154 End: testkeys.Key(ks, startIdx+width), 155 Keys: []Key{key}, 156 }) 157 startIdx += width 158 } 159 } 160 161 // Both the original and the deliberately fragmented spans may contain 162 // overlaps, so we need to sort and fragment them. 163 original = fragment(cmp, formatKey, original) 164 fragmented = fragment(cmp, formatKey, fragmented) 165 166 var originalInner MergingIter 167 originalInner.Init(cmp, noopTransform, new(MergingBuffers), NewIter(cmp, original)) 168 var fragmentedInner MergingIter 169 fragmentedInner.Init(cmp, noopTransform, new(MergingBuffers), NewIter(cmp, fragmented)) 170 171 var referenceIter, fragmentedIter DefragmentingIter 172 referenceIter.Init(comparer, &originalInner, DefragmentInternal, StaticDefragmentReducer, new(DefragmentingBuffers)) 173 fragmentedIter.Init(comparer, &fragmentedInner, DefragmentInternal, StaticDefragmentReducer, new(DefragmentingBuffers)) 174 175 // Generate 100 random operations and run them against both iterators. 176 const numIterOps = 100 177 type opKind struct { 178 weight int 179 fn func() string 180 } 181 ops := []opKind{ 182 {weight: 2, fn: func() string { return "first" }}, 183 {weight: 2, fn: func() string { return "last" }}, 184 {weight: 50, fn: func() string { return "next" }}, 185 {weight: 50, fn: func() string { return "prev" }}, 186 {weight: 5, fn: func() string { 187 k := testkeys.Key(ks, rng.Int63n(ks.Count())) 188 return fmt.Sprintf("seekge(%s)", k) 189 }}, 190 {weight: 5, fn: func() string { 191 k := testkeys.Key(ks, rng.Int63n(ks.Count())) 192 return fmt.Sprintf("seeklt(%s)", k) 193 }}, 194 } 195 var totalWeight int 196 for _, op := range ops { 197 totalWeight += op.weight 198 } 199 var referenceHistory, fragmentedHistory bytes.Buffer 200 for i := 0; i < numIterOps; i++ { 201 p := rng.Intn(totalWeight) 202 opIndex := 0 203 if i == 0 { 204 // First op is always a First(). 205 } else { 206 for i, op := range ops { 207 if p < op.weight { 208 opIndex = i 209 break 210 } 211 p -= op.weight 212 } 213 } 214 op := ops[opIndex].fn() 215 runIterOp(&referenceHistory, &referenceIter, op) 216 runIterOp(&fragmentedHistory, &fragmentedIter, op) 217 if !bytes.Equal(referenceHistory.Bytes(), fragmentedHistory.Bytes()) { 218 t.Fatal(debugContext(cmp, formatKey, original, fragmented, 219 referenceHistory.String(), fragmentedHistory.String())) 220 } 221 fmt.Fprintln(&referenceHistory) 222 fmt.Fprintln(&fragmentedHistory) 223 } 224 } 225 226 func fragment(cmp base.Compare, formatKey base.FormatKey, spans []Span) []Span { 227 Sort(cmp, spans) 228 var fragments []Span 229 f := Fragmenter{ 230 Cmp: cmp, 231 Format: formatKey, 232 Emit: func(f Span) { 233 fragments = append(fragments, f) 234 }, 235 } 236 for _, s := range spans { 237 f.Add(s) 238 } 239 f.Finish() 240 return fragments 241 } 242 243 func debugContext( 244 cmp base.Compare, 245 formatKey base.FormatKey, 246 original, fragmented []Span, 247 refHistory, fragHistory string, 248 ) string { 249 var buf bytes.Buffer 250 fmt.Fprintln(&buf, "Reference:") 251 for _, s := range original { 252 fmt.Fprintln(&buf, s) 253 } 254 fmt.Fprintln(&buf) 255 fmt.Fprintln(&buf, "Fragmented:") 256 for _, s := range fragmented { 257 fmt.Fprintln(&buf, s) 258 } 259 fmt.Fprintln(&buf) 260 fmt.Fprintln(&buf, "\nOperations diff:") 261 diff, err := difflib.GetUnifiedDiffString(difflib.UnifiedDiff{ 262 A: difflib.SplitLines(refHistory), 263 B: difflib.SplitLines(fragHistory), 264 Context: 5, 265 }) 266 if err != nil { 267 panic(err) 268 } 269 fmt.Fprintln(&buf, diff) 270 return buf.String() 271 }