github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/compaction_iter_test.go (about) 1 // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package pebble 6 7 import ( 8 "bytes" 9 "encoding/binary" 10 "fmt" 11 "io" 12 "slices" 13 "strconv" 14 "strings" 15 "testing" 16 17 "github.com/cockroachdb/datadriven" 18 "github.com/cockroachdb/pebble/internal/base" 19 "github.com/cockroachdb/pebble/internal/invalidating" 20 "github.com/cockroachdb/pebble/internal/keyspan" 21 "github.com/cockroachdb/pebble/internal/rangekey" 22 "github.com/cockroachdb/pebble/internal/testkeys" 23 "github.com/stretchr/testify/require" 24 ) 25 26 func TestSnapshotIndex(t *testing.T) { 27 testCases := []struct { 28 snapshots []uint64 29 seq uint64 30 expectedIndex int 31 expectedSeqNum uint64 32 }{ 33 {[]uint64{}, 1, 0, InternalKeySeqNumMax}, 34 {[]uint64{1}, 0, 0, 1}, 35 {[]uint64{1}, 1, 1, InternalKeySeqNumMax}, 36 {[]uint64{1}, 2, 1, InternalKeySeqNumMax}, 37 {[]uint64{1, 3}, 1, 1, 3}, 38 {[]uint64{1, 3}, 2, 1, 3}, 39 {[]uint64{1, 3}, 3, 2, InternalKeySeqNumMax}, 40 {[]uint64{1, 3}, 4, 2, InternalKeySeqNumMax}, 41 {[]uint64{1, 3, 3}, 2, 1, 3}, 42 } 43 for _, c := range testCases { 44 t.Run("", func(t *testing.T) { 45 idx, seqNum := snapshotIndex(c.seq, c.snapshots) 46 if c.expectedIndex != idx { 47 t.Fatalf("expected %d, but got %d", c.expectedIndex, idx) 48 } 49 if c.expectedSeqNum != seqNum { 50 t.Fatalf("expected %d, but got %d", c.expectedSeqNum, seqNum) 51 } 52 }) 53 } 54 } 55 56 type debugMerger struct { 57 buf []byte 58 } 59 60 func (m *debugMerger) MergeNewer(value []byte) error { 61 m.buf = append(m.buf, value...) 62 return nil 63 } 64 65 func (m *debugMerger) MergeOlder(value []byte) error { 66 buf := make([]byte, 0, len(m.buf)+len(value)) 67 buf = append(buf, value...) 68 buf = append(buf, m.buf...) 69 m.buf = buf 70 return nil 71 } 72 73 func (m *debugMerger) Finish(includesBase bool) ([]byte, io.Closer, error) { 74 if includesBase { 75 m.buf = append(m.buf, []byte("[base]")...) 76 } 77 return m.buf, nil, nil 78 } 79 80 func TestCompactionIter(t *testing.T) { 81 var merge Merge 82 var keys []InternalKey 83 var rangeKeys []keyspan.Span 84 var vals [][]byte 85 var snapshots []uint64 86 var elideTombstones bool 87 var allowZeroSeqnum bool 88 var interleavingIter *keyspan.InterleavingIter 89 90 // The input to the data-driven test is dependent on the format major 91 // version we are testing against. 92 fileFunc := func(formatVersion FormatMajorVersion) string { 93 if formatVersion < FormatSetWithDelete { 94 return "testdata/compaction_iter" 95 } 96 if formatVersion < FormatDeleteSizedAndObsolete { 97 return "testdata/compaction_iter_set_with_del" 98 } 99 return "testdata/compaction_iter_delete_sized" 100 } 101 102 var ineffectualSingleDeleteKeys []string 103 var invariantViolationSingleDeleteKeys []string 104 resetSingleDelStats := func() { 105 ineffectualSingleDeleteKeys = ineffectualSingleDeleteKeys[:0] 106 invariantViolationSingleDeleteKeys = invariantViolationSingleDeleteKeys[:0] 107 } 108 newIter := func(formatVersion FormatMajorVersion) *compactionIter { 109 // To adhere to the existing assumption that range deletion blocks in 110 // SSTables are not released while iterating, and therefore not 111 // susceptible to use-after-free bugs, we skip the zeroing of 112 // RangeDelete keys. 113 fi := &fakeIter{keys: keys, vals: vals} 114 interleavingIter = &keyspan.InterleavingIter{} 115 interleavingIter.Init( 116 base.DefaultComparer, 117 fi, 118 keyspan.NewIter(base.DefaultComparer.Compare, rangeKeys), 119 keyspan.InterleavingIterOpts{}) 120 iter := invalidating.NewIter(interleavingIter, invalidating.IgnoreKinds(InternalKeyKindRangeDelete)) 121 if merge == nil { 122 merge = func(key, value []byte) (base.ValueMerger, error) { 123 m := &debugMerger{} 124 m.buf = append(m.buf, value...) 125 return m, nil 126 } 127 } 128 resetSingleDelStats() 129 return newCompactionIter( 130 DefaultComparer.Compare, 131 DefaultComparer.Equal, 132 DefaultComparer.FormatKey, 133 merge, 134 iter, 135 snapshots, 136 &keyspan.Fragmenter{}, 137 &keyspan.Fragmenter{}, 138 allowZeroSeqnum, 139 func([]byte) bool { 140 return elideTombstones 141 }, 142 func(_, _ []byte) bool { 143 return elideTombstones 144 }, 145 func(userKey []byte) { 146 ineffectualSingleDeleteKeys = append(ineffectualSingleDeleteKeys, string(userKey)) 147 }, 148 func(userKey []byte) { 149 invariantViolationSingleDeleteKeys = append(invariantViolationSingleDeleteKeys, string(userKey)) 150 }, 151 formatVersion, 152 ) 153 } 154 155 runTest := func(t *testing.T, formatVersion FormatMajorVersion) { 156 datadriven.RunTest(t, fileFunc(formatVersion), func(t *testing.T, d *datadriven.TestData) string { 157 switch d.Cmd { 158 case "define": 159 merge = nil 160 if len(d.CmdArgs) > 0 && d.CmdArgs[0].Key == "merger" && 161 len(d.CmdArgs[0].Vals) > 0 && d.CmdArgs[0].Vals[0] == "deletable" { 162 merge = newDeletableSumValueMerger 163 } 164 keys = keys[:0] 165 vals = vals[:0] 166 rangeKeys = rangeKeys[:0] 167 for _, key := range strings.Split(d.Input, "\n") { 168 j := strings.Index(key, ":") 169 keys = append(keys, base.ParseInternalKey(key[:j])) 170 171 if strings.HasPrefix(key[j+1:], "varint(") { 172 valueStr := strings.TrimSuffix(strings.TrimPrefix(key[j+1:], "varint("), ")") 173 v, err := strconv.ParseUint(valueStr, 10, 64) 174 require.NoError(t, err) 175 encodedValue := binary.AppendUvarint([]byte(nil), v) 176 vals = append(vals, encodedValue) 177 } else { 178 vals = append(vals, []byte(key[j+1:])) 179 } 180 } 181 return "" 182 183 case "define-range-keys": 184 for _, key := range strings.Split(d.Input, "\n") { 185 s := keyspan.ParseSpan(strings.TrimSpace(key)) 186 rangeKeys = append(rangeKeys, s) 187 } 188 return "" 189 190 case "iter": 191 snapshots = snapshots[:0] 192 elideTombstones = false 193 allowZeroSeqnum = false 194 printSnapshotPinned := false 195 printMissizedDels := false 196 printForceObsolete := false 197 for _, arg := range d.CmdArgs { 198 switch arg.Key { 199 case "snapshots": 200 for _, val := range arg.Vals { 201 seqNum, err := strconv.Atoi(val) 202 if err != nil { 203 return err.Error() 204 } 205 snapshots = append(snapshots, uint64(seqNum)) 206 } 207 case "elide-tombstones": 208 var err error 209 elideTombstones, err = strconv.ParseBool(arg.Vals[0]) 210 if err != nil { 211 return err.Error() 212 } 213 case "allow-zero-seqnum": 214 var err error 215 allowZeroSeqnum, err = strconv.ParseBool(arg.Vals[0]) 216 if err != nil { 217 return err.Error() 218 } 219 case "print-snapshot-pinned": 220 printSnapshotPinned = true 221 case "print-missized-dels": 222 printMissizedDels = true 223 case "print-force-obsolete": 224 printForceObsolete = true 225 default: 226 return fmt.Sprintf("%s: unknown arg: %s", d.Cmd, arg.Key) 227 } 228 } 229 slices.Sort(snapshots) 230 231 iter := newIter(formatVersion) 232 var b bytes.Buffer 233 for _, line := range strings.Split(d.Input, "\n") { 234 parts := strings.Fields(line) 235 if len(parts) == 0 { 236 continue 237 } 238 switch parts[0] { 239 case "first": 240 iter.First() 241 case "next": 242 iter.Next() 243 case "tombstones": 244 var key []byte 245 if len(parts) == 2 { 246 key = []byte(parts[1]) 247 } 248 for _, v := range iter.Tombstones(key) { 249 for _, k := range v.Keys { 250 fmt.Fprintf(&b, "%s-%s#%d\n", v.Start, v.End, k.SeqNum()) 251 } 252 } 253 fmt.Fprintf(&b, ".\n") 254 continue 255 case "range-keys": 256 var key []byte 257 if len(parts) == 2 { 258 key = []byte(parts[1]) 259 } 260 for _, v := range iter.RangeKeys(key) { 261 fmt.Fprintf(&b, "%s\n", v) 262 } 263 fmt.Fprintf(&b, ".\n") 264 continue 265 default: 266 return fmt.Sprintf("unknown op: %s", parts[0]) 267 } 268 if iter.Valid() { 269 snapshotPinned := "" 270 if printSnapshotPinned { 271 snapshotPinned = " (not pinned)" 272 if iter.snapshotPinned { 273 snapshotPinned = " (pinned)" 274 } 275 } 276 forceObsolete := "" 277 if printForceObsolete { 278 forceObsolete = " (not force obsolete)" 279 if iter.forceObsoleteDueToRangeDel { 280 forceObsolete = " (force obsolete)" 281 } 282 } 283 v := string(iter.Value()) 284 if iter.Key().Kind() == base.InternalKeyKindDeleteSized && len(iter.Value()) > 0 { 285 vn, n := binary.Uvarint(iter.Value()) 286 if n != len(iter.Value()) { 287 v = fmt.Sprintf("err: %0x value not a uvarint", iter.Value()) 288 } else { 289 v = fmt.Sprintf("varint(%d)", vn) 290 } 291 } 292 fmt.Fprintf(&b, "%s:%s%s%s\n", iter.Key(), v, snapshotPinned, forceObsolete) 293 if iter.Key().Kind() == InternalKeyKindRangeDelete { 294 iter.rangeDelFrag.Add(keyspan.Span{ 295 Start: append([]byte{}, iter.Key().UserKey...), 296 End: append([]byte{}, iter.Value()...), 297 Keys: []keyspan.Key{ 298 {Trailer: iter.Key().Trailer}, 299 }, 300 }) 301 } 302 if rangekey.IsRangeKey(iter.Key().Kind()) { 303 iter.rangeKeyFrag.Add(*interleavingIter.Span()) 304 } 305 } else if err := iter.Error(); err != nil { 306 fmt.Fprintf(&b, "err=%v\n", err) 307 } else { 308 fmt.Fprintf(&b, ".\n") 309 } 310 } 311 if printMissizedDels { 312 fmt.Fprintf(&b, "missized-dels=%d\n", iter.stats.countMissizedDels) 313 } 314 if len(ineffectualSingleDeleteKeys) > 0 { 315 fmt.Fprintf(&b, "ineffectual-single-deletes: %s\n", 316 strings.Join(ineffectualSingleDeleteKeys, ",")) 317 } 318 if len(invariantViolationSingleDeleteKeys) > 0 { 319 fmt.Fprintf(&b, "invariant-violation-single-deletes: %s\n", 320 strings.Join(invariantViolationSingleDeleteKeys, ",")) 321 } 322 return b.String() 323 324 default: 325 return fmt.Sprintf("unknown command: %s", d.Cmd) 326 } 327 }) 328 } 329 330 // Rather than testing against all format version, we test against the 331 // significant boundaries. 332 formatVersions := []FormatMajorVersion{ 333 FormatMostCompatible, 334 FormatSetWithDelete - 1, 335 FormatSetWithDelete, 336 internalFormatNewest, 337 } 338 for _, formatVersion := range formatVersions { 339 t.Run(fmt.Sprintf("version-%s", formatVersion), func(t *testing.T) { 340 runTest(t, formatVersion) 341 }) 342 } 343 } 344 345 func TestFrontiers(t *testing.T) { 346 cmp := testkeys.Comparer.Compare 347 var keySets [][][]byte 348 datadriven.RunTest(t, "testdata/frontiers", func(t *testing.T, td *datadriven.TestData) string { 349 switch td.Cmd { 350 case "init": 351 // Init configures a frontier per line of input. Each line should 352 // contain a sorted whitespace-separated list of keys that the 353 // frontier will use. 354 // 355 // For example, the following input creates two separate monitored 356 // frontiers: one that sets its key successively to 'd', 'e', 'j' 357 // and one that sets its key to 'a', 'p', 'n', 'z': 358 // 359 // init 360 // b e j 361 // a p n z 362 363 keySets = keySets[:0] 364 for _, line := range strings.Split(td.Input, "\n") { 365 keySets = append(keySets, bytes.Fields([]byte(line))) 366 } 367 return "" 368 case "scan": 369 f := &frontiers{cmp: cmp} 370 for _, keys := range keySets { 371 initTestFrontier(f, keys...) 372 } 373 var buf bytes.Buffer 374 for _, kStr := range strings.Fields(td.Input) { 375 k := []byte(kStr) 376 f.Advance(k) 377 fmt.Fprintf(&buf, "%s : { %s }\n", kStr, f.String()) 378 } 379 return buf.String() 380 default: 381 return fmt.Sprintf("unrecognized command %q", td.Cmd) 382 } 383 }) 384 } 385 386 // initTestFrontiers adds a new frontier to f that iterates through the provided 387 // keys. The keys slice must be sorted. 388 func initTestFrontier(f *frontiers, keys ...[]byte) *frontier { 389 ff := &frontier{} 390 var key []byte 391 if len(keys) > 0 { 392 key, keys = keys[0], keys[1:] 393 } 394 reached := func(k []byte) (nextKey []byte) { 395 if len(keys) > 0 { 396 nextKey, keys = keys[0], keys[1:] 397 } 398 return nextKey 399 } 400 ff.Init(f, key, reached) 401 return ff 402 }