github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/sstable/test_fixtures.go (about) 1 // Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package sstable 6 7 import ( 8 "bufio" 9 "fmt" 10 "io" 11 "math" 12 "os" 13 "path/filepath" 14 "sort" 15 "strings" 16 "sync" 17 18 "github.com/cockroachdb/pebble/bloom" 19 "github.com/cockroachdb/pebble/internal/base" 20 "github.com/cockroachdb/pebble/objstorage/objstorageprovider" 21 "github.com/cockroachdb/pebble/vfs" 22 ) 23 24 // testKVs is a key-value map holding test data. 25 type testKVs map[string]string 26 27 // SortedKeys returns the keys in the map, in sorted order. 28 func (m testKVs) SortedKeys() []string { 29 res := make([]string, 0, len(m)) 30 for k := range m { 31 res = append(res, k) 32 } 33 sort.Strings(res) 34 return res 35 } 36 37 // These variable should not be used directly, only via hamletWordCount(). 38 var hamletWordCountState struct { 39 once sync.Once 40 data testKVs 41 } 42 43 // hamletWordCount returns the data in testdata.h/txt, as a map from word to 44 // count (as string). 45 func hamletWordCount() testKVs { 46 hamletWordCountState.once.Do(func() { 47 wordCount := make(map[string]string) 48 f, err := os.Open(filepath.FromSlash("testdata/h.txt")) 49 if err != nil { 50 panic(err) 51 } 52 defer f.Close() 53 r := bufio.NewReader(f) 54 55 for { 56 s, err := r.ReadBytes('\n') 57 if err == io.EOF { 58 break 59 } 60 if err != nil { 61 panic(err) 62 } 63 k := strings.TrimSpace(string(s[8:])) 64 v := strings.TrimSpace(string(s[:8])) 65 wordCount[k] = v 66 } 67 if len(wordCount) != 1710 { 68 panic(fmt.Sprintf("h.txt entry count: got %d, want %d", len(wordCount), 1710)) 69 } 70 for _, s := range hamletNonsenseWords { 71 if _, ok := wordCount[s]; ok { 72 panic(fmt.Sprintf("nonsense word %q was in h.txt", s)) 73 } 74 } 75 hamletWordCountState.data = wordCount 76 }) 77 return hamletWordCountState.data 78 } 79 80 // hamletNonsenseWords are words that aren't in testdata/h.txt. 81 var hamletNonsenseWords = []string{ 82 // Edge cases. 83 "", 84 "\x00", 85 "\xff", 86 "`", 87 "a\x00", 88 "aaaaaa", 89 "pol\x00nius", 90 "youth\x00", 91 "youti", 92 "zzzzzz", 93 // Capitalized versions of actual words in testdata/h.txt. 94 "A", 95 "Hamlet", 96 "thEE", 97 "YOUTH", 98 // The following were generated by http://soybomb.com/tricks/words/ 99 "pectures", 100 "exectly", 101 "tricatrippian", 102 "recens", 103 "whiratroce", 104 "troped", 105 "balmous", 106 "droppewry", 107 "toilizing", 108 "crocias", 109 "eathrass", 110 "cheakden", 111 "speablett", 112 "skirinies", 113 "prefing", 114 "bonufacision", 115 } 116 117 // buildHamletTestSST creates an sst file containing the hamlet word count data, 118 // using the given options. 119 func buildHamletTestSST( 120 fs vfs.FS, 121 filename string, 122 compression Compression, 123 fp FilterPolicy, 124 ftype FilterType, 125 comparer *Comparer, 126 propCollector func() TablePropertyCollector, 127 blockSize int, 128 indexBlockSize int, 129 ) error { 130 wordCount := hamletWordCount() 131 keys := wordCount.SortedKeys() 132 133 // Write the key/value pairs to a new table, in increasing key order. 134 f0, err := fs.Create(filename) 135 if err != nil { 136 return err 137 } 138 139 writerOpts := WriterOptions{ 140 BlockSize: blockSize, 141 Comparer: comparer, 142 Compression: compression, 143 FilterPolicy: fp, 144 FilterType: ftype, 145 IndexBlockSize: indexBlockSize, 146 MergerName: "nullptr", 147 TableFormat: fixtureFormat, 148 } 149 if propCollector != nil { 150 writerOpts.TablePropertyCollectors = append(writerOpts.TablePropertyCollectors, propCollector) 151 } 152 153 w := NewWriter(objstorageprovider.NewFileWritable(f0), writerOpts) 154 // Use rangeDelV1Format for testing byte equality with RocksDB. 155 w.rangeDelV1Format = true 156 var rangeDelLength int 157 var rangeDelCounter int 158 var rangeDelStart InternalKey 159 for i, k := range keys { 160 v := wordCount[k] 161 ikey := base.MakeInternalKey([]byte(k), 0, InternalKeyKindSet) 162 if err := w.Add(ikey, []byte(v)); err != nil { 163 return err 164 } 165 // This mirrors the logic in `make-table.cc`. It adds range deletions of 166 // increasing length for every 100 keys added. 167 if i%100 == 0 { 168 rangeDelStart = ikey.Clone() 169 rangeDelCounter = 0 170 rangeDelLength++ 171 } 172 rangeDelCounter++ 173 174 if rangeDelCounter == rangeDelLength { 175 if err := w.DeleteRange(rangeDelStart.UserKey, ikey.UserKey); err != nil { 176 return err 177 } 178 } 179 } 180 return w.Close() 181 } 182 183 // TestFixtureInfo contains all metadata necessary to generate a test sstable. 184 type TestFixtureInfo struct { 185 Filename string 186 Compression Compression 187 FullKeyFilter bool 188 PrefixFilter bool 189 IndexBlockSize int 190 UseFixtureComparer bool 191 KeyCountPropertyCollector bool 192 } 193 194 // TestFixtures contains all metadata necessary to generate the test SSTs. 195 var TestFixtures = []TestFixtureInfo{ 196 { 197 Filename: "h.sst", 198 Compression: SnappyCompression, 199 FullKeyFilter: false, 200 PrefixFilter: false, 201 IndexBlockSize: fixtureDefaultIndexBlockSize, 202 UseFixtureComparer: false, 203 KeyCountPropertyCollector: true, 204 }, 205 { 206 Filename: "h.no-compression.sst", 207 Compression: NoCompression, 208 FullKeyFilter: false, 209 PrefixFilter: false, 210 IndexBlockSize: fixtureDefaultIndexBlockSize, 211 UseFixtureComparer: false, 212 KeyCountPropertyCollector: true, 213 }, 214 { 215 Filename: "h.table-bloom.sst", 216 Compression: SnappyCompression, 217 FullKeyFilter: true, 218 PrefixFilter: false, 219 IndexBlockSize: fixtureDefaultIndexBlockSize, 220 UseFixtureComparer: false, 221 KeyCountPropertyCollector: false, 222 }, 223 { 224 Filename: "h.table-bloom.no-compression.sst", 225 Compression: NoCompression, 226 FullKeyFilter: true, 227 PrefixFilter: false, 228 IndexBlockSize: fixtureDefaultIndexBlockSize, 229 UseFixtureComparer: false, 230 KeyCountPropertyCollector: false, 231 }, 232 { 233 Filename: "h.table-bloom.no-compression.prefix_extractor.no_whole_key_filter.sst", 234 Compression: NoCompression, 235 FullKeyFilter: false, 236 PrefixFilter: true, 237 IndexBlockSize: fixtureDefaultIndexBlockSize, 238 UseFixtureComparer: true, 239 KeyCountPropertyCollector: false, 240 }, 241 { 242 Filename: "h.no-compression.two_level_index.sst", 243 Compression: NoCompression, 244 FullKeyFilter: false, 245 PrefixFilter: false, 246 IndexBlockSize: fixtureSmallIndexBlockSize, 247 UseFixtureComparer: false, 248 KeyCountPropertyCollector: true, 249 }, 250 { 251 Filename: "h.zstd-compression.sst", 252 Compression: ZstdCompression, 253 FullKeyFilter: false, 254 PrefixFilter: false, 255 IndexBlockSize: fixtureDefaultIndexBlockSize, 256 UseFixtureComparer: false, 257 KeyCountPropertyCollector: true, 258 }, 259 } 260 261 // Build creates an sst file for the given fixture. 262 func (tf TestFixtureInfo) Build(fs vfs.FS, filename string) error { 263 var fp base.FilterPolicy 264 if tf.FullKeyFilter || tf.PrefixFilter { 265 fp = bloom.FilterPolicy(10) 266 } 267 var comparer *Comparer 268 if tf.UseFixtureComparer { 269 comparer = fixtureComparer 270 } 271 var propCollector func() TablePropertyCollector 272 if tf.KeyCountPropertyCollector { 273 propCollector = func() TablePropertyCollector { 274 return &keyCountPropertyCollector{} 275 } 276 } 277 278 return buildHamletTestSST( 279 fs, filename, tf.Compression, fp, base.TableFilter, 280 comparer, 281 propCollector, 282 fixtureBlockSize, 283 tf.IndexBlockSize, 284 ) 285 } 286 287 const fixtureDefaultIndexBlockSize = math.MaxInt32 288 const fixtureSmallIndexBlockSize = 128 289 const fixtureBlockSize = 2048 290 const fixtureFormat = TableFormatPebblev1 291 292 type keyCountPropertyCollector struct { 293 count int 294 } 295 296 func (c *keyCountPropertyCollector) Add(key InternalKey, value []byte) error { 297 c.count++ 298 return nil 299 } 300 301 func (c *keyCountPropertyCollector) Finish(userProps map[string]string) error { 302 userProps["test.key-count"] = fmt.Sprint(c.count) 303 return nil 304 } 305 306 func (c *keyCountPropertyCollector) Name() string { 307 return "KeyCountPropertyCollector" 308 } 309 310 var fixtureComparer = func() *Comparer { 311 c := *base.DefaultComparer 312 // NB: this is named as such only to match the built-in RocksDB comparer. 313 c.Name = "leveldb.BytewiseComparator" 314 c.Split = func(a []byte) int { 315 // TODO(tbg): It's difficult to provide a more meaningful prefix extractor 316 // on the given dataset since it's not MVCC, and so it's impossible to come 317 // up with a sensible one. We need to add a better dataset and use that 318 // instead to get confidence that prefix extractors are working as intended. 319 return len(a) 320 } 321 return &c 322 }()