github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/sstable/test_fixtures.go (about)

     1  // Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package sstable
     6  
     7  import (
     8  	"bufio"
     9  	"fmt"
    10  	"io"
    11  	"math"
    12  	"os"
    13  	"path/filepath"
    14  	"sort"
    15  	"strings"
    16  	"sync"
    17  
    18  	"github.com/cockroachdb/pebble/bloom"
    19  	"github.com/cockroachdb/pebble/internal/base"
    20  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
    21  	"github.com/cockroachdb/pebble/vfs"
    22  )
    23  
    24  // testKVs is a key-value map holding test data.
    25  type testKVs map[string]string
    26  
    27  // SortedKeys returns the keys in the map, in sorted order.
    28  func (m testKVs) SortedKeys() []string {
    29  	res := make([]string, 0, len(m))
    30  	for k := range m {
    31  		res = append(res, k)
    32  	}
    33  	sort.Strings(res)
    34  	return res
    35  }
    36  
    37  // These variable should not be used directly, only via hamletWordCount().
    38  var hamletWordCountState struct {
    39  	once sync.Once
    40  	data testKVs
    41  }
    42  
    43  // hamletWordCount returns the data in testdata.h/txt, as a map from word to
    44  // count (as string).
    45  func hamletWordCount() testKVs {
    46  	hamletWordCountState.once.Do(func() {
    47  		wordCount := make(map[string]string)
    48  		f, err := os.Open(filepath.FromSlash("testdata/h.txt"))
    49  		if err != nil {
    50  			panic(err)
    51  		}
    52  		defer f.Close()
    53  		r := bufio.NewReader(f)
    54  
    55  		for {
    56  			s, err := r.ReadBytes('\n')
    57  			if err == io.EOF {
    58  				break
    59  			}
    60  			if err != nil {
    61  				panic(err)
    62  			}
    63  			k := strings.TrimSpace(string(s[8:]))
    64  			v := strings.TrimSpace(string(s[:8]))
    65  			wordCount[k] = v
    66  		}
    67  		if len(wordCount) != 1710 {
    68  			panic(fmt.Sprintf("h.txt entry count: got %d, want %d", len(wordCount), 1710))
    69  		}
    70  		for _, s := range hamletNonsenseWords {
    71  			if _, ok := wordCount[s]; ok {
    72  				panic(fmt.Sprintf("nonsense word %q was in h.txt", s))
    73  			}
    74  		}
    75  		hamletWordCountState.data = wordCount
    76  	})
    77  	return hamletWordCountState.data
    78  }
    79  
    80  // hamletNonsenseWords are words that aren't in testdata/h.txt.
    81  var hamletNonsenseWords = []string{
    82  	// Edge cases.
    83  	"",
    84  	"\x00",
    85  	"\xff",
    86  	"`",
    87  	"a\x00",
    88  	"aaaaaa",
    89  	"pol\x00nius",
    90  	"youth\x00",
    91  	"youti",
    92  	"zzzzzz",
    93  	// Capitalized versions of actual words in testdata/h.txt.
    94  	"A",
    95  	"Hamlet",
    96  	"thEE",
    97  	"YOUTH",
    98  	// The following were generated by http://soybomb.com/tricks/words/
    99  	"pectures",
   100  	"exectly",
   101  	"tricatrippian",
   102  	"recens",
   103  	"whiratroce",
   104  	"troped",
   105  	"balmous",
   106  	"droppewry",
   107  	"toilizing",
   108  	"crocias",
   109  	"eathrass",
   110  	"cheakden",
   111  	"speablett",
   112  	"skirinies",
   113  	"prefing",
   114  	"bonufacision",
   115  }
   116  
   117  // buildHamletTestSST creates an sst file containing the hamlet word count data,
   118  // using the given options.
   119  func buildHamletTestSST(
   120  	fs vfs.FS,
   121  	filename string,
   122  	compression Compression,
   123  	fp FilterPolicy,
   124  	ftype FilterType,
   125  	comparer *Comparer,
   126  	propCollector func() TablePropertyCollector,
   127  	blockSize int,
   128  	indexBlockSize int,
   129  ) error {
   130  	wordCount := hamletWordCount()
   131  	keys := wordCount.SortedKeys()
   132  
   133  	// Write the key/value pairs to a new table, in increasing key order.
   134  	f0, err := fs.Create(filename)
   135  	if err != nil {
   136  		return err
   137  	}
   138  
   139  	writerOpts := WriterOptions{
   140  		BlockSize:      blockSize,
   141  		Comparer:       comparer,
   142  		Compression:    compression,
   143  		FilterPolicy:   fp,
   144  		FilterType:     ftype,
   145  		IndexBlockSize: indexBlockSize,
   146  		MergerName:     "nullptr",
   147  		TableFormat:    fixtureFormat,
   148  	}
   149  	if propCollector != nil {
   150  		writerOpts.TablePropertyCollectors = append(writerOpts.TablePropertyCollectors, propCollector)
   151  	}
   152  
   153  	w := NewWriter(objstorageprovider.NewFileWritable(f0), writerOpts)
   154  	// Use rangeDelV1Format for testing byte equality with RocksDB.
   155  	w.rangeDelV1Format = true
   156  	var rangeDelLength int
   157  	var rangeDelCounter int
   158  	var rangeDelStart InternalKey
   159  	for i, k := range keys {
   160  		v := wordCount[k]
   161  		ikey := base.MakeInternalKey([]byte(k), 0, InternalKeyKindSet)
   162  		if err := w.Add(ikey, []byte(v)); err != nil {
   163  			return err
   164  		}
   165  		// This mirrors the logic in `make-table.cc`. It adds range deletions of
   166  		// increasing length for every 100 keys added.
   167  		if i%100 == 0 {
   168  			rangeDelStart = ikey.Clone()
   169  			rangeDelCounter = 0
   170  			rangeDelLength++
   171  		}
   172  		rangeDelCounter++
   173  
   174  		if rangeDelCounter == rangeDelLength {
   175  			if err := w.DeleteRange(rangeDelStart.UserKey, ikey.UserKey); err != nil {
   176  				return err
   177  			}
   178  		}
   179  	}
   180  	return w.Close()
   181  }
   182  
   183  // TestFixtureInfo contains all metadata necessary to generate a test sstable.
   184  type TestFixtureInfo struct {
   185  	Filename                  string
   186  	Compression               Compression
   187  	FullKeyFilter             bool
   188  	PrefixFilter              bool
   189  	IndexBlockSize            int
   190  	UseFixtureComparer        bool
   191  	KeyCountPropertyCollector bool
   192  }
   193  
   194  // TestFixtures contains all metadata necessary to generate the test SSTs.
   195  var TestFixtures = []TestFixtureInfo{
   196  	{
   197  		Filename:                  "h.sst",
   198  		Compression:               SnappyCompression,
   199  		FullKeyFilter:             false,
   200  		PrefixFilter:              false,
   201  		IndexBlockSize:            fixtureDefaultIndexBlockSize,
   202  		UseFixtureComparer:        false,
   203  		KeyCountPropertyCollector: true,
   204  	},
   205  	{
   206  		Filename:                  "h.no-compression.sst",
   207  		Compression:               NoCompression,
   208  		FullKeyFilter:             false,
   209  		PrefixFilter:              false,
   210  		IndexBlockSize:            fixtureDefaultIndexBlockSize,
   211  		UseFixtureComparer:        false,
   212  		KeyCountPropertyCollector: true,
   213  	},
   214  	{
   215  		Filename:                  "h.table-bloom.sst",
   216  		Compression:               SnappyCompression,
   217  		FullKeyFilter:             true,
   218  		PrefixFilter:              false,
   219  		IndexBlockSize:            fixtureDefaultIndexBlockSize,
   220  		UseFixtureComparer:        false,
   221  		KeyCountPropertyCollector: false,
   222  	},
   223  	{
   224  		Filename:                  "h.table-bloom.no-compression.sst",
   225  		Compression:               NoCompression,
   226  		FullKeyFilter:             true,
   227  		PrefixFilter:              false,
   228  		IndexBlockSize:            fixtureDefaultIndexBlockSize,
   229  		UseFixtureComparer:        false,
   230  		KeyCountPropertyCollector: false,
   231  	},
   232  	{
   233  		Filename:                  "h.table-bloom.no-compression.prefix_extractor.no_whole_key_filter.sst",
   234  		Compression:               NoCompression,
   235  		FullKeyFilter:             false,
   236  		PrefixFilter:              true,
   237  		IndexBlockSize:            fixtureDefaultIndexBlockSize,
   238  		UseFixtureComparer:        true,
   239  		KeyCountPropertyCollector: false,
   240  	},
   241  	{
   242  		Filename:                  "h.no-compression.two_level_index.sst",
   243  		Compression:               NoCompression,
   244  		FullKeyFilter:             false,
   245  		PrefixFilter:              false,
   246  		IndexBlockSize:            fixtureSmallIndexBlockSize,
   247  		UseFixtureComparer:        false,
   248  		KeyCountPropertyCollector: true,
   249  	},
   250  	{
   251  		Filename:                  "h.zstd-compression.sst",
   252  		Compression:               ZstdCompression,
   253  		FullKeyFilter:             false,
   254  		PrefixFilter:              false,
   255  		IndexBlockSize:            fixtureDefaultIndexBlockSize,
   256  		UseFixtureComparer:        false,
   257  		KeyCountPropertyCollector: true,
   258  	},
   259  }
   260  
   261  // Build creates an sst file for the given fixture.
   262  func (tf TestFixtureInfo) Build(fs vfs.FS, filename string) error {
   263  	var fp base.FilterPolicy
   264  	if tf.FullKeyFilter || tf.PrefixFilter {
   265  		fp = bloom.FilterPolicy(10)
   266  	}
   267  	var comparer *Comparer
   268  	if tf.UseFixtureComparer {
   269  		comparer = fixtureComparer
   270  	}
   271  	var propCollector func() TablePropertyCollector
   272  	if tf.KeyCountPropertyCollector {
   273  		propCollector = func() TablePropertyCollector {
   274  			return &keyCountPropertyCollector{}
   275  		}
   276  	}
   277  
   278  	return buildHamletTestSST(
   279  		fs, filename, tf.Compression, fp, base.TableFilter,
   280  		comparer,
   281  		propCollector,
   282  		fixtureBlockSize,
   283  		tf.IndexBlockSize,
   284  	)
   285  }
   286  
   287  const fixtureDefaultIndexBlockSize = math.MaxInt32
   288  const fixtureSmallIndexBlockSize = 128
   289  const fixtureBlockSize = 2048
   290  const fixtureFormat = TableFormatPebblev1
   291  
   292  type keyCountPropertyCollector struct {
   293  	count int
   294  }
   295  
   296  func (c *keyCountPropertyCollector) Add(key InternalKey, value []byte) error {
   297  	c.count++
   298  	return nil
   299  }
   300  
   301  func (c *keyCountPropertyCollector) Finish(userProps map[string]string) error {
   302  	userProps["test.key-count"] = fmt.Sprint(c.count)
   303  	return nil
   304  }
   305  
   306  func (c *keyCountPropertyCollector) Name() string {
   307  	return "KeyCountPropertyCollector"
   308  }
   309  
   310  var fixtureComparer = func() *Comparer {
   311  	c := *base.DefaultComparer
   312  	// NB: this is named as such only to match the built-in RocksDB comparer.
   313  	c.Name = "leveldb.BytewiseComparator"
   314  	c.Split = func(a []byte) int {
   315  		// TODO(tbg): It's difficult to provide a more meaningful prefix extractor
   316  		// on the given dataset since it's not MVCC, and so it's impossible to come
   317  		// up with a sensible one. We need to add a better dataset and use that
   318  		// instead to get confidence that prefix extractors are working as intended.
   319  		return len(a)
   320  	}
   321  	return &c
   322  }()