github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/external_iterator_test.go (about) 1 // Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package pebble 6 7 import ( 8 "bytes" 9 "fmt" 10 "math" 11 "testing" 12 "time" 13 14 "github.com/cockroachdb/datadriven" 15 "github.com/cockroachdb/errors" 16 "github.com/cockroachdb/pebble/internal/base" 17 "github.com/cockroachdb/pebble/internal/cache" 18 "github.com/cockroachdb/pebble/internal/itertest" 19 "github.com/cockroachdb/pebble/internal/testkeys" 20 "github.com/cockroachdb/pebble/objstorage/objstorageprovider" 21 "github.com/cockroachdb/pebble/sstable" 22 "github.com/cockroachdb/pebble/vfs" 23 "github.com/stretchr/testify/require" 24 "golang.org/x/exp/rand" 25 ) 26 27 func TestExternalIterator(t *testing.T) { 28 mem := vfs.NewMem() 29 o := &Options{ 30 FS: mem, 31 Comparer: testkeys.Comparer, 32 FormatMajorVersion: FormatRangeKeys, 33 } 34 o.EnsureDefaults() 35 d, err := Open("", o) 36 require.NoError(t, err) 37 defer func() { require.NoError(t, d.Close()) }() 38 39 datadriven.RunTest(t, "testdata/external_iterator", func(t *testing.T, td *datadriven.TestData) string { 40 switch td.Cmd { 41 case "reset": 42 mem = vfs.NewMem() 43 return "" 44 case "build": 45 if err := runBuildCmd(td, d, mem); err != nil { 46 return err.Error() 47 } 48 return "" 49 case "iter": 50 opts := IterOptions{KeyTypes: IterKeyTypePointsAndRanges} 51 var externalIterOpts []ExternalIterOption 52 var files [][]sstable.ReadableFile 53 for _, arg := range td.CmdArgs { 54 switch arg.Key { 55 case "fwd-only": 56 externalIterOpts = append(externalIterOpts, ExternalIterForwardOnly{}) 57 case "mask-suffix": 58 opts.RangeKeyMasking.Suffix = []byte(arg.Vals[0]) 59 case "lower": 60 opts.LowerBound = []byte(arg.Vals[0]) 61 case "upper": 62 opts.UpperBound = []byte(arg.Vals[0]) 63 case "files": 64 for _, v := range arg.Vals { 65 f, err := mem.Open(v) 66 require.NoError(t, err) 67 files = append(files, []sstable.ReadableFile{f}) 68 } 69 } 70 } 71 it, err := NewExternalIter(o, &opts, files, externalIterOpts...) 72 require.NoError(t, err) 73 return runIterCmd(td, it, true /* close iter */) 74 default: 75 return fmt.Sprintf("unknown command: %s", td.Cmd) 76 } 77 }) 78 } 79 80 func TestSimpleLevelIter(t *testing.T) { 81 mem := vfs.NewMem() 82 o := &Options{ 83 FS: mem, 84 Comparer: testkeys.Comparer, 85 FormatMajorVersion: FormatRangeKeys, 86 } 87 o.EnsureDefaults() 88 d, err := Open("", o) 89 require.NoError(t, err) 90 defer func() { require.NoError(t, d.Close()) }() 91 92 datadriven.RunTest(t, "testdata/simple_level_iter", func(t *testing.T, td *datadriven.TestData) string { 93 switch td.Cmd { 94 case "reset": 95 mem = vfs.NewMem() 96 return "" 97 case "build": 98 if err := runBuildCmd(td, d, mem); err != nil { 99 return err.Error() 100 } 101 return "" 102 case "iter": 103 var files []sstable.ReadableFile 104 var filenames []string 105 td.ScanArgs(t, "files", &filenames) 106 for _, name := range filenames { 107 f, err := mem.Open(name) 108 require.NoError(t, err) 109 files = append(files, f) 110 } 111 readers, err := openExternalTables(o, files, 0, o.MakeReaderOptions()) 112 require.NoError(t, err) 113 defer func() { 114 for i := range readers { 115 _ = readers[i].Close() 116 } 117 }() 118 var internalIters []internalIterator 119 for i := range readers { 120 iter, err := readers[i].NewIter(nil, nil) 121 require.NoError(t, err) 122 internalIters = append(internalIters, iter) 123 } 124 it := &simpleLevelIter{cmp: o.Comparer.Compare, iters: internalIters} 125 it.init(IterOptions{}) 126 127 response := itertest.RunInternalIterCmd(t, td, it) 128 require.NoError(t, it.Close()) 129 return response 130 default: 131 return fmt.Sprintf("unknown command: %s", td.Cmd) 132 } 133 }) 134 } 135 136 func TestSimpleIterError(t *testing.T) { 137 s := simpleLevelIter{cmp: DefaultComparer.Compare, iters: []internalIterator{&errorIter{err: errors.New("injected")}}} 138 s.init(IterOptions{}) 139 defer s.Close() 140 141 iterKey, _ := s.First() 142 require.Nil(t, iterKey) 143 require.Error(t, s.Error()) 144 } 145 146 func TestIterRandomizedMaybeFilteredKeys(t *testing.T) { 147 mem := vfs.NewMem() 148 149 seed := *seed 150 if seed == 0 { 151 seed = uint64(time.Now().UnixNano()) 152 t.Logf("seed: %d", seed) 153 } 154 rng := rand.New(rand.NewSource(seed)) 155 numKeys := 100 + rng.Intn(5000) 156 // The block property filter will exclude keys with suffixes [0, tsSeparator-1]. 157 // We use the first "part" of the keyspace below to write keys >= tsSeparator, 158 // and the second part to write keys < tsSeparator. Successive parts (if any) 159 // will contain keys at random before or after the separator. 160 tsSeparator := 10 + rng.Int63n(5000) 161 const keyLen = 5 162 163 // We split the keyspace into logical "parts" which are disjoint slices of the 164 // keyspace. That is, the keyspace a-z could be comprised of parts {a-k, l-z}. 165 // We rely on this partitioning when generating timestamps to give us some 166 // predictable clustering of timestamps in sstable blocks, however it is not 167 // strictly necessary for this test. 168 alpha := testkeys.Alpha(keyLen) 169 numParts := rng.Intn(3) + 2 170 blockSize := 16 + rng.Intn(64) 171 172 c := cache.New(128 << 20) 173 defer c.Unref() 174 175 for fileIdx, twoLevelIndex := range []bool{false, true} { 176 t.Run(fmt.Sprintf("twoLevelIndex=%v", twoLevelIndex), func(t *testing.T) { 177 keys := make([][]byte, 0, numKeys) 178 179 filename := fmt.Sprintf("test-%d", fileIdx) 180 f0, err := mem.Create(filename) 181 require.NoError(t, err) 182 183 indexBlockSize := 4096 184 if twoLevelIndex { 185 indexBlockSize = 1 186 } 187 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f0), sstable.WriterOptions{ 188 BlockSize: blockSize, 189 Comparer: testkeys.Comparer, 190 IndexBlockSize: indexBlockSize, 191 TableFormat: sstable.TableFormatPebblev2, 192 BlockPropertyCollectors: []func() BlockPropertyCollector{ 193 func() BlockPropertyCollector { 194 return sstable.NewTestKeysBlockPropertyCollector() 195 }, 196 }, 197 }) 198 buf := make([]byte, alpha.MaxLen()+testkeys.MaxSuffixLen) 199 valBuf := make([]byte, 20) 200 keyIdx := int64(0) 201 for i := 0; i < numParts; i++ { 202 // The first two parts of the keyspace are special. The first one has 203 // all keys with timestamps greater than tsSeparator, while the second 204 // one has all keys with timestamps less than tsSeparator. Any additional 205 // keys could have timestamps at random before or after the tsSeparator. 206 maxKeysPerPart := numKeys / numParts 207 for j := 0; j < maxKeysPerPart; j++ { 208 var ts int64 209 if i == 0 { 210 ts = rng.Int63n(5000) + tsSeparator 211 } else if i == 1 { 212 ts = rng.Int63n(tsSeparator) 213 } else { 214 ts = rng.Int63n(tsSeparator + 5000) 215 } 216 n := testkeys.WriteKeyAt(buf, alpha, keyIdx*alpha.Count()/int64(numKeys), ts) 217 keys = append(keys, append([]byte(nil), buf[:n]...)) 218 randStr(valBuf, rng) 219 require.NoError(t, w.Set(buf[:n], valBuf)) 220 keyIdx++ 221 } 222 } 223 require.NoError(t, w.Close()) 224 225 // Re-open that filename for reading. 226 f1, err := mem.Open(filename) 227 require.NoError(t, err) 228 229 readable, err := sstable.NewSimpleReadable(f1) 230 require.NoError(t, err) 231 232 r, err := sstable.NewReader(readable, sstable.ReaderOptions{ 233 Cache: c, 234 Comparer: testkeys.Comparer, 235 }) 236 require.NoError(t, err) 237 defer r.Close() 238 239 filter := sstable.NewTestKeysBlockPropertyFilter(uint64(tsSeparator), math.MaxUint64) 240 filterer, err := sstable.IntersectsTable([]BlockPropertyFilter{filter}, nil, r.Properties.UserProperties) 241 require.NoError(t, err) 242 require.NotNil(t, filterer) 243 244 var iter sstable.Iterator 245 iter, err = r.NewIterWithBlockPropertyFilters( 246 nil, nil, filterer, false /* useFilterBlock */, nil, /* stats */ 247 sstable.CategoryAndQoS{}, nil, sstable.TrivialReaderProvider{Reader: r}) 248 require.NoError(t, err) 249 defer iter.Close() 250 var lastSeekKey, lowerBound, upperBound []byte 251 narrowBoundsMode := false 252 253 for i := 0; i < 10000; i++ { 254 if rng.Intn(8) == 0 { 255 // Toggle narrow bounds mode. 256 if narrowBoundsMode { 257 // Reset bounds. 258 lowerBound, upperBound = nil, nil 259 iter.SetBounds(nil /* lower */, nil /* upper */) 260 } 261 narrowBoundsMode = !narrowBoundsMode 262 } 263 keyIdx := rng.Intn(len(keys)) 264 seekKey := keys[keyIdx] 265 if narrowBoundsMode { 266 // Case 1: We just entered narrow bounds mode, and both bounds 267 // are nil. Set a lower/upper bound. 268 // 269 // Case 2: The seek key is outside our last bounds. 270 // 271 // In either case, pick a narrow range of keys to set bounds on, 272 // let's say keys[keyIdx-5] and keys[keyIdx+5], before doing our 273 // seek operation. Picking narrow bounds increases the chance of 274 // monotonic bound changes. 275 cmp := testkeys.Comparer.Compare 276 case1 := lowerBound == nil && upperBound == nil 277 case2 := (lowerBound != nil && cmp(lowerBound, seekKey) > 0) || (upperBound != nil && cmp(upperBound, seekKey) <= 0) 278 if case1 || case2 { 279 lowerBound = nil 280 if keyIdx-5 >= 0 { 281 lowerBound = keys[keyIdx-5] 282 } 283 upperBound = nil 284 if keyIdx+5 < len(keys) { 285 upperBound = keys[keyIdx+5] 286 } 287 iter.SetBounds(lowerBound, upperBound) 288 } 289 // Case 3: The current seek key is within the previously-set bounds. 290 // No need to change bounds. 291 } 292 flags := base.SeekGEFlagsNone 293 if lastSeekKey != nil && bytes.Compare(seekKey, lastSeekKey) > 0 { 294 flags = flags.EnableTrySeekUsingNext() 295 } 296 lastSeekKey = append(lastSeekKey[:0], seekKey...) 297 298 newKey, _ := iter.SeekGE(seekKey, flags) 299 if newKey == nil || !bytes.Equal(newKey.UserKey, seekKey) { 300 // We skipped some keys. Check if maybeFilteredKeys is true. 301 formattedNewKey := "<nil>" 302 if newKey != nil { 303 formattedNewKey = fmt.Sprintf("%s", testkeys.Comparer.FormatKey(newKey.UserKey)) 304 } 305 require.True(t, iter.MaybeFilteredKeys(), "seeked for key = %s, got key = %s indicating block property filtering but MaybeFilteredKeys = false", testkeys.Comparer.FormatKey(seekKey), formattedNewKey) 306 } 307 } 308 }) 309 } 310 } 311 312 func BenchmarkExternalIter_NonOverlapping_SeekNextScan(b *testing.B) { 313 ks := testkeys.Alpha(6) 314 opts := (&Options{}).EnsureDefaults() 315 iterOpts := &IterOptions{ 316 KeyTypes: IterKeyTypePointsAndRanges, 317 } 318 writeOpts := opts.MakeWriterOptions(6, sstable.TableFormatPebblev2) 319 320 for _, keyCount := range []int{100, 10_000, 100_000} { 321 b.Run(fmt.Sprintf("keys=%d", keyCount), func(b *testing.B) { 322 for _, fileCount := range []int{1, 10, 100} { 323 b.Run(fmt.Sprintf("files=%d", fileCount), func(b *testing.B) { 324 var fs vfs.FS = vfs.NewMem() 325 filenames := make([]string, fileCount) 326 var keys [][]byte 327 for i := 0; i < fileCount; i++ { 328 filename := fmt.Sprintf("%03d.sst", i) 329 wf, err := fs.Create(filename) 330 require.NoError(b, err) 331 w := sstable.NewWriter(objstorageprovider.NewFileWritable(wf), writeOpts) 332 for j := 0; j < keyCount/fileCount; j++ { 333 key := testkeys.Key(ks, int64(len(keys))) 334 keys = append(keys, key) 335 require.NoError(b, w.Set(key, key)) 336 } 337 require.NoError(b, w.Close()) 338 filenames[i] = filename 339 } 340 341 for _, forwardOnly := range []bool{false, true} { 342 b.Run(fmt.Sprintf("forward-only=%t", forwardOnly), func(b *testing.B) { 343 var externalIterOpts []ExternalIterOption 344 if forwardOnly { 345 externalIterOpts = append(externalIterOpts, ExternalIterForwardOnly{}) 346 } 347 348 for i := 0; i < b.N; i++ { 349 func() { 350 files := make([][]sstable.ReadableFile, fileCount) 351 for i := 0; i < fileCount; i++ { 352 f, err := fs.Open(filenames[i]) 353 require.NoError(b, err) 354 files[i] = []sstable.ReadableFile{f} 355 } 356 357 it, err := NewExternalIter(opts, iterOpts, files, externalIterOpts...) 358 require.NoError(b, err) 359 defer it.Close() 360 361 for k := 0; k+1 < len(keys); k += 2 { 362 if !it.SeekGE(keys[k]) { 363 b.Fatalf("key %q not found", keys[k]) 364 } 365 if !it.Next() { 366 b.Fatalf("key %q not found", keys[k+1]) 367 } 368 if !bytes.Equal(it.Key(), keys[k+1]) { 369 b.Fatalf("expected key %q, found %q", keys[k+1], it.Key()) 370 } 371 } 372 }() 373 } 374 }) 375 } 376 }) 377 } 378 }) 379 } 380 }