github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/compaction_iter_test.go (about) 1 // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package pebble 6 7 import ( 8 "bytes" 9 "encoding/binary" 10 "fmt" 11 "io" 12 "sort" 13 "strconv" 14 "strings" 15 "testing" 16 17 "github.com/cockroachdb/datadriven" 18 "github.com/cockroachdb/pebble/internal/base" 19 "github.com/cockroachdb/pebble/internal/invalidating" 20 "github.com/cockroachdb/pebble/internal/keyspan" 21 "github.com/cockroachdb/pebble/internal/rangekey" 22 "github.com/cockroachdb/pebble/internal/testkeys" 23 "github.com/stretchr/testify/require" 24 ) 25 26 func TestSnapshotIndex(t *testing.T) { 27 testCases := []struct { 28 snapshots []uint64 29 seq uint64 30 expectedIndex int 31 expectedSeqNum uint64 32 }{ 33 {[]uint64{}, 1, 0, InternalKeySeqNumMax}, 34 {[]uint64{1}, 0, 0, 1}, 35 {[]uint64{1}, 1, 1, InternalKeySeqNumMax}, 36 {[]uint64{1}, 2, 1, InternalKeySeqNumMax}, 37 {[]uint64{1, 3}, 1, 1, 3}, 38 {[]uint64{1, 3}, 2, 1, 3}, 39 {[]uint64{1, 3}, 3, 2, InternalKeySeqNumMax}, 40 {[]uint64{1, 3}, 4, 2, InternalKeySeqNumMax}, 41 {[]uint64{1, 3, 3}, 2, 1, 3}, 42 } 43 for _, c := range testCases { 44 t.Run("", func(t *testing.T) { 45 idx, seqNum := snapshotIndex(c.seq, c.snapshots) 46 if c.expectedIndex != idx { 47 t.Fatalf("expected %d, but got %d", c.expectedIndex, idx) 48 } 49 if c.expectedSeqNum != seqNum { 50 t.Fatalf("expected %d, but got %d", c.expectedSeqNum, seqNum) 51 } 52 }) 53 } 54 } 55 56 type debugMerger struct { 57 buf []byte 58 } 59 60 func (m *debugMerger) MergeNewer(value []byte) error { 61 m.buf = append(m.buf, value...) 62 return nil 63 } 64 65 func (m *debugMerger) MergeOlder(value []byte) error { 66 buf := make([]byte, 0, len(m.buf)+len(value)) 67 buf = append(buf, value...) 68 buf = append(buf, m.buf...) 69 m.buf = buf 70 return nil 71 } 72 73 func (m *debugMerger) Finish(includesBase bool) ([]byte, io.Closer, error) { 74 if includesBase { 75 m.buf = append(m.buf, []byte("[base]")...) 76 } 77 return m.buf, nil, nil 78 } 79 80 func TestCompactionIter(t *testing.T) { 81 var merge Merge 82 var keys []InternalKey 83 var rangeKeys []keyspan.Span 84 var vals [][]byte 85 var snapshots []uint64 86 var elideTombstones bool 87 var allowZeroSeqnum bool 88 var interleavingIter *keyspan.InterleavingIter 89 90 // The input to the data-driven test is dependent on the format major 91 // version we are testing against. 92 fileFunc := func(formatVersion FormatMajorVersion) string { 93 if formatVersion < FormatSetWithDelete { 94 return "testdata/compaction_iter" 95 } 96 if formatVersion < FormatDeleteSizedAndObsolete { 97 return "testdata/compaction_iter_set_with_del" 98 } 99 return "testdata/compaction_iter_delete_sized" 100 } 101 102 var ineffectualSingleDeleteKeys []string 103 var invariantViolationSingleDeleteKeys []string 104 resetSingleDelStats := func() { 105 ineffectualSingleDeleteKeys = ineffectualSingleDeleteKeys[:0] 106 invariantViolationSingleDeleteKeys = invariantViolationSingleDeleteKeys[:0] 107 } 108 newIter := func(formatVersion FormatMajorVersion) *compactionIter { 109 // To adhere to the existing assumption that range deletion blocks in 110 // SSTables are not released while iterating, and therefore not 111 // susceptible to use-after-free bugs, we skip the zeroing of 112 // RangeDelete keys. 113 fi := &fakeIter{keys: keys, vals: vals} 114 interleavingIter = &keyspan.InterleavingIter{} 115 interleavingIter.Init( 116 base.DefaultComparer, 117 fi, 118 keyspan.NewIter(base.DefaultComparer.Compare, rangeKeys), 119 keyspan.InterleavingIterOpts{}) 120 iter := invalidating.NewIter(interleavingIter, invalidating.IgnoreKinds(InternalKeyKindRangeDelete)) 121 if merge == nil { 122 merge = func(key, value []byte) (base.ValueMerger, error) { 123 m := &debugMerger{} 124 m.buf = append(m.buf, value...) 125 return m, nil 126 } 127 } 128 resetSingleDelStats() 129 return newCompactionIter( 130 DefaultComparer.Compare, 131 DefaultComparer.Equal, 132 DefaultComparer.FormatKey, 133 merge, 134 iter, 135 snapshots, 136 &keyspan.Fragmenter{}, 137 &keyspan.Fragmenter{}, 138 allowZeroSeqnum, 139 func([]byte) bool { 140 return elideTombstones 141 }, 142 func(_, _ []byte) bool { 143 return elideTombstones 144 }, 145 func(userKey []byte) { 146 ineffectualSingleDeleteKeys = append(ineffectualSingleDeleteKeys, string(userKey)) 147 }, 148 func(userKey []byte) { 149 invariantViolationSingleDeleteKeys = append(invariantViolationSingleDeleteKeys, string(userKey)) 150 }, 151 formatVersion, 152 ) 153 } 154 155 runTest := func(t *testing.T, formatVersion FormatMajorVersion) { 156 datadriven.RunTest(t, fileFunc(formatVersion), func(t *testing.T, d *datadriven.TestData) string { 157 switch d.Cmd { 158 case "define": 159 merge = nil 160 if len(d.CmdArgs) > 0 && d.CmdArgs[0].Key == "merger" && 161 len(d.CmdArgs[0].Vals) > 0 && d.CmdArgs[0].Vals[0] == "deletable" { 162 merge = newDeletableSumValueMerger 163 } 164 keys = keys[:0] 165 vals = vals[:0] 166 rangeKeys = rangeKeys[:0] 167 for _, key := range strings.Split(d.Input, "\n") { 168 j := strings.Index(key, ":") 169 keys = append(keys, base.ParseInternalKey(key[:j])) 170 171 if strings.HasPrefix(key[j+1:], "varint(") { 172 valueStr := strings.TrimSuffix(strings.TrimPrefix(key[j+1:], "varint("), ")") 173 v, err := strconv.ParseUint(valueStr, 10, 64) 174 require.NoError(t, err) 175 encodedValue := binary.AppendUvarint([]byte(nil), v) 176 vals = append(vals, encodedValue) 177 } else { 178 vals = append(vals, []byte(key[j+1:])) 179 } 180 } 181 return "" 182 183 case "define-range-keys": 184 for _, key := range strings.Split(d.Input, "\n") { 185 s := keyspan.ParseSpan(strings.TrimSpace(key)) 186 rangeKeys = append(rangeKeys, s) 187 } 188 return "" 189 190 case "iter": 191 snapshots = snapshots[:0] 192 elideTombstones = false 193 allowZeroSeqnum = false 194 printSnapshotPinned := false 195 printMissizedDels := false 196 printForceObsolete := false 197 for _, arg := range d.CmdArgs { 198 switch arg.Key { 199 case "snapshots": 200 for _, val := range arg.Vals { 201 seqNum, err := strconv.Atoi(val) 202 if err != nil { 203 return err.Error() 204 } 205 snapshots = append(snapshots, uint64(seqNum)) 206 } 207 case "elide-tombstones": 208 var err error 209 elideTombstones, err = strconv.ParseBool(arg.Vals[0]) 210 if err != nil { 211 return err.Error() 212 } 213 case "allow-zero-seqnum": 214 var err error 215 allowZeroSeqnum, err = strconv.ParseBool(arg.Vals[0]) 216 if err != nil { 217 return err.Error() 218 } 219 case "print-snapshot-pinned": 220 printSnapshotPinned = true 221 case "print-missized-dels": 222 printMissizedDels = true 223 case "print-force-obsolete": 224 printForceObsolete = true 225 default: 226 return fmt.Sprintf("%s: unknown arg: %s", d.Cmd, arg.Key) 227 } 228 } 229 sort.Slice(snapshots, func(i, j int) bool { 230 return snapshots[i] < snapshots[j] 231 }) 232 233 iter := newIter(formatVersion) 234 var b bytes.Buffer 235 for _, line := range strings.Split(d.Input, "\n") { 236 parts := strings.Fields(line) 237 if len(parts) == 0 { 238 continue 239 } 240 switch parts[0] { 241 case "first": 242 iter.First() 243 case "next": 244 iter.Next() 245 case "tombstones": 246 var key []byte 247 if len(parts) == 2 { 248 key = []byte(parts[1]) 249 } 250 for _, v := range iter.Tombstones(key) { 251 for _, k := range v.Keys { 252 fmt.Fprintf(&b, "%s-%s#%d\n", v.Start, v.End, k.SeqNum()) 253 } 254 } 255 fmt.Fprintf(&b, ".\n") 256 continue 257 case "range-keys": 258 var key []byte 259 if len(parts) == 2 { 260 key = []byte(parts[1]) 261 } 262 for _, v := range iter.RangeKeys(key) { 263 fmt.Fprintf(&b, "%s\n", v) 264 } 265 fmt.Fprintf(&b, ".\n") 266 continue 267 default: 268 return fmt.Sprintf("unknown op: %s", parts[0]) 269 } 270 if iter.Valid() { 271 snapshotPinned := "" 272 if printSnapshotPinned { 273 snapshotPinned = " (not pinned)" 274 if iter.snapshotPinned { 275 snapshotPinned = " (pinned)" 276 } 277 } 278 forceObsolete := "" 279 if printForceObsolete { 280 forceObsolete = " (not force obsolete)" 281 if iter.forceObsoleteDueToRangeDel { 282 forceObsolete = " (force obsolete)" 283 } 284 } 285 v := string(iter.Value()) 286 if iter.Key().Kind() == base.InternalKeyKindDeleteSized && len(iter.Value()) > 0 { 287 vn, n := binary.Uvarint(iter.Value()) 288 if n != len(iter.Value()) { 289 v = fmt.Sprintf("err: %0x value not a uvarint", iter.Value()) 290 } else { 291 v = fmt.Sprintf("varint(%d)", vn) 292 } 293 } 294 fmt.Fprintf(&b, "%s:%s%s%s\n", iter.Key(), v, snapshotPinned, forceObsolete) 295 if iter.Key().Kind() == InternalKeyKindRangeDelete { 296 iter.rangeDelFrag.Add(keyspan.Span{ 297 Start: append([]byte{}, iter.Key().UserKey...), 298 End: append([]byte{}, iter.Value()...), 299 Keys: []keyspan.Key{ 300 {Trailer: iter.Key().Trailer}, 301 }, 302 }) 303 } 304 if rangekey.IsRangeKey(iter.Key().Kind()) { 305 iter.rangeKeyFrag.Add(*interleavingIter.Span()) 306 } 307 } else if err := iter.Error(); err != nil { 308 fmt.Fprintf(&b, "err=%v\n", err) 309 } else { 310 fmt.Fprintf(&b, ".\n") 311 } 312 } 313 if printMissizedDels { 314 fmt.Fprintf(&b, "missized-dels=%d\n", iter.stats.countMissizedDels) 315 } 316 if len(ineffectualSingleDeleteKeys) > 0 { 317 fmt.Fprintf(&b, "ineffectual-single-deletes: %s\n", 318 strings.Join(ineffectualSingleDeleteKeys, ",")) 319 } 320 if len(invariantViolationSingleDeleteKeys) > 0 { 321 fmt.Fprintf(&b, "invariant-violation-single-deletes: %s\n", 322 strings.Join(invariantViolationSingleDeleteKeys, ",")) 323 } 324 return b.String() 325 326 default: 327 return fmt.Sprintf("unknown command: %s", d.Cmd) 328 } 329 }) 330 } 331 332 // Rather than testing against all format version, we test against the 333 // significant boundaries. 334 formatVersions := []FormatMajorVersion{ 335 FormatMostCompatible, 336 FormatSetWithDelete - 1, 337 FormatSetWithDelete, 338 internalFormatNewest, 339 } 340 for _, formatVersion := range formatVersions { 341 t.Run(fmt.Sprintf("version-%s", formatVersion), func(t *testing.T) { 342 runTest(t, formatVersion) 343 }) 344 } 345 } 346 347 func TestFrontiers(t *testing.T) { 348 cmp := testkeys.Comparer.Compare 349 var keySets [][][]byte 350 datadriven.RunTest(t, "testdata/frontiers", func(t *testing.T, td *datadriven.TestData) string { 351 switch td.Cmd { 352 case "init": 353 // Init configures a frontier per line of input. Each line should 354 // contain a sorted whitespace-separated list of keys that the 355 // frontier will use. 356 // 357 // For example, the following input creates two separate monitored 358 // frontiers: one that sets its key successively to 'd', 'e', 'j' 359 // and one that sets its key to 'a', 'p', 'n', 'z': 360 // 361 // init 362 // b e j 363 // a p n z 364 365 keySets = keySets[:0] 366 for _, line := range strings.Split(td.Input, "\n") { 367 keySets = append(keySets, bytes.Fields([]byte(line))) 368 } 369 return "" 370 case "scan": 371 f := &frontiers{cmp: cmp} 372 for _, keys := range keySets { 373 initTestFrontier(f, keys...) 374 } 375 var buf bytes.Buffer 376 for _, kStr := range strings.Fields(td.Input) { 377 k := []byte(kStr) 378 f.Advance(k) 379 fmt.Fprintf(&buf, "%s : { %s }\n", kStr, f.String()) 380 } 381 return buf.String() 382 default: 383 return fmt.Sprintf("unrecognized command %q", td.Cmd) 384 } 385 }) 386 } 387 388 // initTestFrontiers adds a new frontier to f that iterates through the provided 389 // keys. The keys slice must be sorted. 390 func initTestFrontier(f *frontiers, keys ...[]byte) *frontier { 391 ff := &frontier{} 392 var key []byte 393 if len(keys) > 0 { 394 key, keys = keys[0], keys[1:] 395 } 396 reached := func(k []byte) (nextKey []byte) { 397 if len(keys) > 0 { 398 nextKey, keys = keys[0], keys[1:] 399 } 400 return nextKey 401 } 402 ff.Init(f, key, reached) 403 return ff 404 }