github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/internal/rangekey/coalesce_test.go (about) 1 // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package rangekey 6 7 import ( 8 "bytes" 9 "fmt" 10 "io" 11 "math/rand" 12 "strconv" 13 "strings" 14 "testing" 15 "time" 16 17 "github.com/pmezard/go-difflib/difflib" 18 "github.com/stretchr/testify/require" 19 "github.com/zuoyebang/bitalostable/internal/base" 20 "github.com/zuoyebang/bitalostable/internal/datadriven" 21 "github.com/zuoyebang/bitalostable/internal/keyspan" 22 "github.com/zuoyebang/bitalostable/internal/testkeys" 23 ) 24 25 func TestCoalesce(t *testing.T) { 26 var buf bytes.Buffer 27 cmp := testkeys.Comparer.Compare 28 29 datadriven.RunTest(t, "testdata/coalesce", func(td *datadriven.TestData) string { 30 switch td.Cmd { 31 case "coalesce": 32 buf.Reset() 33 span := keyspan.ParseSpan(td.Input) 34 coalesced := keyspan.Span{ 35 Start: span.Start, 36 End: span.End, 37 } 38 if err := Coalesce(cmp, span.Keys, &coalesced.Keys); err != nil { 39 return err.Error() 40 } 41 fmt.Fprintln(&buf, coalesced) 42 return buf.String() 43 default: 44 return fmt.Sprintf("unrecognized command %q", td.Cmd) 45 } 46 }) 47 } 48 49 func TestIter(t *testing.T) { 50 cmp := testkeys.Comparer.Compare 51 var iter keyspan.MergingIter 52 var buf bytes.Buffer 53 54 datadriven.RunTest(t, "testdata/iter", func(td *datadriven.TestData) string { 55 buf.Reset() 56 switch td.Cmd { 57 case "define": 58 visibleSeqNum := base.InternalKeySeqNumMax 59 for _, arg := range td.CmdArgs { 60 if arg.Key == "visible-seq-num" { 61 var err error 62 visibleSeqNum, err = strconv.ParseUint(arg.Vals[0], 10, 64) 63 require.NoError(t, err) 64 } 65 } 66 67 var spans []keyspan.Span 68 lines := strings.Split(strings.TrimSpace(td.Input), "\n") 69 for _, line := range lines { 70 spans = append(spans, keyspan.ParseSpan(line)) 71 } 72 transform := keyspan.TransformerFunc(func(cmp base.Compare, s keyspan.Span, dst *keyspan.Span) error { 73 s = s.Visible(visibleSeqNum) 74 dst.Start = s.Start 75 dst.End = s.End 76 return Coalesce(cmp, s.Keys, &dst.Keys) 77 }) 78 iter.Init(cmp, transform, keyspan.NewIter(cmp, spans)) 79 return "OK" 80 case "iter": 81 buf.Reset() 82 lines := strings.Split(strings.TrimSpace(td.Input), "\n") 83 for _, line := range lines { 84 line = strings.TrimSpace(line) 85 i := strings.IndexByte(line, ' ') 86 iterCmd := line 87 if i > 0 { 88 iterCmd = string(line[:i]) 89 } 90 var s *keyspan.Span 91 switch iterCmd { 92 case "first": 93 s = iter.First() 94 case "last": 95 s = iter.Last() 96 case "next": 97 s = iter.Next() 98 case "prev": 99 s = iter.Prev() 100 case "seek-ge": 101 s = iter.SeekGE([]byte(strings.TrimSpace(line[i:]))) 102 case "seek-lt": 103 s = iter.SeekLT([]byte(strings.TrimSpace(line[i:]))) 104 default: 105 return fmt.Sprintf("unrecognized iter command %q", iterCmd) 106 } 107 require.NoError(t, iter.Error()) 108 fmt.Fprint(&buf, s) 109 if buf.Len() > 0 { 110 fmt.Fprintln(&buf) 111 } 112 } 113 return buf.String() 114 default: 115 return fmt.Sprintf("unrecognized command %q", td.Cmd) 116 } 117 }) 118 } 119 120 func TestDefragmenting(t *testing.T) { 121 cmp := testkeys.Comparer.Compare 122 123 var buf bytes.Buffer 124 var spans []keyspan.Span 125 var hasPrefix bool 126 var prefix []byte 127 datadriven.RunTest(t, "testdata/defragmenting_iter", func(td *datadriven.TestData) string { 128 buf.Reset() 129 switch td.Cmd { 130 case "define": 131 spans = spans[:0] 132 lines := strings.Split(strings.TrimSpace(td.Input), "\n") 133 for _, line := range lines { 134 spans = append(spans, keyspan.ParseSpan(line)) 135 } 136 return "" 137 case "iter": 138 var userIterCfg UserIteratorConfig 139 iter := userIterCfg.Init(testkeys.Comparer, base.InternalKeySeqNumMax, 140 nil /* lower */, nil, /* upper */ 141 &hasPrefix, &prefix, 142 keyspan.NewIter(cmp, spans)) 143 for _, line := range strings.Split(td.Input, "\n") { 144 runIterOp(&buf, iter, line) 145 } 146 return strings.TrimSpace(buf.String()) 147 default: 148 return fmt.Sprintf("unrecognized command %q", td.Cmd) 149 } 150 }) 151 } 152 153 func TestDefragmentingIter_Randomized(t *testing.T) { 154 seed := time.Now().UnixNano() 155 for i := int64(0); i < 100; i++ { 156 testDefragmentingIteRandomizedOnce(t, seed+i) 157 } 158 } 159 160 func TestDefragmentingIter_RandomizedFixedSeed(t *testing.T) { 161 const seed = 1648173101214881000 162 testDefragmentingIteRandomizedOnce(t, seed) 163 } 164 165 func testDefragmentingIteRandomizedOnce(t *testing.T, seed int64) { 166 cmp := testkeys.Comparer.Compare 167 formatKey := testkeys.Comparer.FormatKey 168 169 rng := rand.New(rand.NewSource(seed)) 170 t.Logf("seed = %d", seed) 171 172 // Use a key space of alphanumeric strings, with a random max length between 173 // 1-2. Repeat keys are more common at the lower max lengths. 174 ks := testkeys.Alpha(rng.Intn(2) + 1) 175 176 // Generate between 1-15 range keys. 177 const maxRangeKeys = 15 178 var original, fragmented []keyspan.Span 179 numRangeKeys := 1 + rng.Intn(maxRangeKeys) 180 for i := 0; i < numRangeKeys; i++ { 181 startIdx := rng.Intn(ks.Count()) 182 endIdx := rng.Intn(ks.Count()) 183 for startIdx == endIdx { 184 endIdx = rng.Intn(ks.Count()) 185 } 186 if startIdx > endIdx { 187 startIdx, endIdx = endIdx, startIdx 188 } 189 190 key := keyspan.Key{ 191 Trailer: base.MakeTrailer(uint64(i), base.InternalKeyKindRangeKeySet), 192 Value: []byte(fmt.Sprintf("v%d", rng.Intn(3))), 193 } 194 // Generate suffixes 0, 1, 2, or 3 with 0 indicating none. 195 if suffix := rng.Intn(4); suffix > 0 { 196 key.Suffix = testkeys.Suffix(suffix) 197 } 198 original = append(original, keyspan.Span{ 199 Start: testkeys.Key(ks, startIdx), 200 End: testkeys.Key(ks, endIdx), 201 Keys: []keyspan.Key{key}, 202 }) 203 204 for startIdx < endIdx { 205 width := rng.Intn(endIdx-startIdx) + 1 206 fragmented = append(fragmented, keyspan.Span{ 207 Start: testkeys.Key(ks, startIdx), 208 End: testkeys.Key(ks, startIdx+width), 209 Keys: []keyspan.Key{key}, 210 }) 211 startIdx += width 212 } 213 } 214 215 // Both the original and the deliberately fragmented spans may contain 216 // overlaps, so we need to sort and fragment them. 217 original = fragment(cmp, formatKey, original) 218 fragmented = fragment(cmp, formatKey, fragmented) 219 220 var referenceCfg, fragmentedCfg UserIteratorConfig 221 referenceIter := referenceCfg.Init(testkeys.Comparer, base.InternalKeySeqNumMax, 222 nil /* lower */, nil, /* upper */ 223 new(bool), new([]byte), 224 keyspan.NewIter(cmp, original)) 225 fragmentedIter := fragmentedCfg.Init(testkeys.Comparer, base.InternalKeySeqNumMax, 226 nil /* lower */, nil, /* upper */ 227 new(bool), new([]byte), 228 keyspan.NewIter(cmp, fragmented)) 229 230 // Generate 100 random operations and run them against both iterators. 231 const numIterOps = 100 232 type opKind struct { 233 weight int 234 fn func() string 235 } 236 ops := []opKind{ 237 {weight: 2, fn: func() string { return "first" }}, 238 {weight: 2, fn: func() string { return "last" }}, 239 {weight: 50, fn: func() string { return "next" }}, 240 {weight: 50, fn: func() string { return "prev" }}, 241 {weight: 5, fn: func() string { 242 k := testkeys.Key(ks, rng.Intn(ks.Count())) 243 return fmt.Sprintf("seekge(%s)", k) 244 }}, 245 {weight: 5, fn: func() string { 246 k := testkeys.Key(ks, rng.Intn(ks.Count())) 247 return fmt.Sprintf("seeklt(%s)", k) 248 }}, 249 } 250 var totalWeight int 251 for _, op := range ops { 252 totalWeight += op.weight 253 } 254 var referenceHistory, fragmentedHistory bytes.Buffer 255 for i := 0; i < numIterOps; i++ { 256 p := rng.Intn(totalWeight) 257 opIndex := 0 258 if i == 0 { 259 // First op is always a First(). 260 } else { 261 for i, op := range ops { 262 if p < op.weight { 263 opIndex = i 264 break 265 } 266 p -= op.weight 267 } 268 } 269 op := ops[opIndex].fn() 270 runIterOp(&referenceHistory, referenceIter, op) 271 runIterOp(&fragmentedHistory, fragmentedIter, op) 272 if !bytes.Equal(referenceHistory.Bytes(), fragmentedHistory.Bytes()) { 273 t.Fatal(debugContext(cmp, formatKey, original, fragmented, 274 referenceHistory.String(), fragmentedHistory.String())) 275 } 276 } 277 } 278 279 func fragment(cmp base.Compare, formatKey base.FormatKey, spans []keyspan.Span) []keyspan.Span { 280 keyspan.Sort(cmp, spans) 281 var fragments []keyspan.Span 282 f := keyspan.Fragmenter{ 283 Cmp: cmp, 284 Format: formatKey, 285 Emit: func(f keyspan.Span) { 286 fragments = append(fragments, f) 287 }, 288 } 289 for _, s := range spans { 290 f.Add(s) 291 } 292 f.Finish() 293 return fragments 294 } 295 296 func debugContext( 297 cmp base.Compare, 298 formatKey base.FormatKey, 299 original, fragmented []keyspan.Span, 300 refHistory, fragHistory string, 301 ) string { 302 var buf bytes.Buffer 303 fmt.Fprintln(&buf, "Reference:") 304 for _, s := range original { 305 fmt.Fprintln(&buf, s) 306 } 307 fmt.Fprintln(&buf) 308 fmt.Fprintln(&buf, "Fragmented:") 309 for _, s := range fragmented { 310 fmt.Fprintln(&buf, s) 311 } 312 fmt.Fprintln(&buf) 313 fmt.Fprintln(&buf, "\nOperations diff:") 314 diff, err := difflib.GetUnifiedDiffString(difflib.UnifiedDiff{ 315 A: difflib.SplitLines(refHistory), 316 B: difflib.SplitLines(fragHistory), 317 Context: 5, 318 }) 319 if err != nil { 320 panic(err) 321 } 322 fmt.Fprintln(&buf, diff) 323 return buf.String() 324 } 325 326 var iterDelim = map[rune]bool{',': true, ' ': true, '(': true, ')': true, '"': true} 327 328 func runIterOp(w io.Writer, it keyspan.FragmentIterator, op string) { 329 fields := strings.FieldsFunc(op, func(r rune) bool { return iterDelim[r] }) 330 var s *keyspan.Span 331 switch strings.ToLower(fields[0]) { 332 case "first": 333 s = it.First() 334 case "last": 335 s = it.Last() 336 case "seekge": 337 s = it.SeekGE([]byte(fields[1])) 338 case "seeklt": 339 s = it.SeekLT([]byte(fields[1])) 340 case "next": 341 s = it.Next() 342 case "prev": 343 s = it.Prev() 344 default: 345 panic(fmt.Sprintf("unrecognized iter op %q", fields[0])) 346 } 347 fmt.Fprintf(w, "%-10s", op) 348 if s == nil { 349 fmt.Fprintln(w, ".") 350 return 351 } 352 fmt.Fprintln(w, s) 353 }