github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/sstable/testdata/make-table.cc (about)

     1  // Copyright 2011 The LevelDB-Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This program adds N lines from infile to a leveldb table at outfile.
     6  // The h.txt infile was generated via:
     7  // cat hamlet-act-1.txt | tr '[:upper:]' '[:lower:]' | grep -o -E '\w+' | sort | uniq -c > infile
     8  //
     9  // To build and run:
    10  // g++ make-table.cc -lleveldb && ./a.out
    11  
    12  #include <fstream>
    13  #include <iostream>
    14  #include <string>
    15  
    16  #include "rocksdb/env.h"
    17  #include "rocksdb/filter_policy.h"
    18  #include "rocksdb/slice_transform.h"
    19  #include "rocksdb/sst_file_writer.h"
    20  #include "rocksdb/table.h"
    21  
    22  const char* infile = "h.txt";
    23  
    24  // A dummy prefix extractor that cuts off the last two bytes for keys of
    25  // length three or over. This is not a valid prefix extractor and barely
    26  // enough to do a little bit of unit testing.
    27  //
    28  // TODO(tbg): write some test infra using CockroachDB MVCC data.
    29  class PrefixExtractor : public rocksdb::SliceTransform {
    30   public:
    31    PrefixExtractor() {}
    32  
    33    virtual const char* Name() const { return "leveldb.BytewiseComparator"; }
    34  
    35    virtual rocksdb::Slice Transform(const rocksdb::Slice& src) const {
    36      auto sl = rocksdb::Slice(src.data(), src.size());
    37      return sl;
    38    }
    39  
    40    virtual bool InDomain(const rocksdb::Slice& src) const { return true; }
    41  };
    42  
    43  class KeyCountPropertyCollector : public rocksdb::TablePropertiesCollector {
    44   public:
    45    KeyCountPropertyCollector()
    46        : count_(0) {
    47    }
    48  
    49    rocksdb::Status AddUserKey(const rocksdb::Slice&, const rocksdb::Slice&,
    50                               rocksdb::EntryType type, rocksdb::SequenceNumber,
    51                               uint64_t) override {
    52      count_++;
    53      return rocksdb::Status::OK();
    54    }
    55  
    56    rocksdb::Status Finish(rocksdb::UserCollectedProperties* properties) override {
    57      char buf[16];
    58      sprintf(buf, "%d", count_);
    59      *properties = rocksdb::UserCollectedProperties{
    60        {"test.key-count", buf},
    61      };
    62      return rocksdb::Status::OK();
    63    }
    64  
    65    const char* Name() const override { return "KeyCountPropertyCollector"; }
    66  
    67    rocksdb::UserCollectedProperties GetReadableProperties() const override {
    68      return rocksdb::UserCollectedProperties{};
    69    }
    70  
    71   private:
    72    int count_;
    73  };
    74  
    75  class KeyCountPropertyCollectorFactory : public rocksdb::TablePropertiesCollectorFactory {
    76    virtual rocksdb::TablePropertiesCollector* CreateTablePropertiesCollector(
    77        rocksdb::TablePropertiesCollectorFactory::Context context) override {
    78      return new KeyCountPropertyCollector();
    79    }
    80    const char* Name() const override { return "KeyCountPropertyCollector"; }
    81  };
    82  
    83  int write() {
    84    for (int i = 0; i < 12; ++i) {
    85      rocksdb::Options options;
    86      rocksdb::BlockBasedTableOptions table_options;
    87      const char* outfile;
    88  
    89      table_options.block_size = 2048;
    90      table_options.index_shortening = rocksdb::BlockBasedTableOptions::IndexShorteningMode::kShortenSeparatorsAndSuccessor;
    91  
    92      switch (i) {
    93        case 0:
    94          outfile = "h.ldb";
    95          table_options.format_version = 0;
    96          table_options.whole_key_filtering = false;
    97          break;
    98  
    99        case 1:
   100          outfile = "h.sst";
   101          options.table_properties_collector_factories.emplace_back(
   102              new KeyCountPropertyCollectorFactory);
   103          table_options.whole_key_filtering = false;
   104          break;
   105  
   106        case 2:
   107          outfile = "h.no-compression.sst";
   108          options.table_properties_collector_factories.emplace_back(
   109              new KeyCountPropertyCollectorFactory);
   110          options.compression = rocksdb::kNoCompression;
   111          table_options.whole_key_filtering = false;
   112          break;
   113  
   114        case 3:
   115          outfile = "h.block-bloom.no-compression.sst";
   116          options.compression = rocksdb::kNoCompression;
   117          table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true));
   118          table_options.whole_key_filtering = true;
   119          break;
   120  
   121        case 4:
   122          outfile = "h.table-bloom.sst";
   123          table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false));
   124          table_options.whole_key_filtering = true;
   125          break;
   126  
   127        case 5:
   128          outfile = "h.table-bloom.no-compression.sst";
   129          options.compression = rocksdb::kNoCompression;
   130          table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false));
   131          table_options.whole_key_filtering = true;
   132          break;
   133  
   134        case 6:
   135          // TODO(peter): unused at this time
   136          //
   137          // outfile = "h.block-bloom.no-compression.prefix_extractor.sst";
   138          // options.compression = rocksdb::kNoCompression;
   139          // options.prefix_extractor.reset(new PrefixExtractor);
   140          // table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true));
   141          // table_options.whole_key_filtering = true;
   142          // break;
   143          continue;
   144  
   145        case 7:
   146          // TODO(peter): unused at this time
   147          //
   148          // outfile = "h.table-bloom.no-compression.prefix_extractor.sst";
   149          // options.compression = rocksdb::kNoCompression;
   150          // options.prefix_extractor.reset(new PrefixExtractor);
   151          // table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false));
   152          // table_options.whole_key_filtering = true;
   153          // break;
   154          continue;
   155  
   156        case 8:
   157          // TODO(peter): unused at this time
   158          //
   159          // outfile = "h.block-bloom.no-compression.prefix_extractor.no_whole_key_filter.sst";
   160          // options.compression = rocksdb::kNoCompression;
   161          // options.prefix_extractor.reset(new PrefixExtractor);
   162          // table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true));
   163          // table_options.whole_key_filtering = false;
   164          // break;
   165          continue;
   166  
   167        case 9:
   168          outfile = "h.table-bloom.no-compression.prefix_extractor.no_whole_key_filter.sst";
   169          options.compression = rocksdb::kNoCompression;
   170          options.prefix_extractor.reset(new PrefixExtractor);
   171          table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false));
   172          table_options.whole_key_filtering = false;
   173          break;
   174  
   175        case 10:
   176          outfile = "h.no-compression.two_level_index.sst";
   177          options.table_properties_collector_factories.emplace_back(
   178              new KeyCountPropertyCollectorFactory);
   179          options.compression = rocksdb::kNoCompression;
   180          table_options.index_type = rocksdb::BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
   181          // Use small metadata_block_size to stress two_level_index.
   182          table_options.metadata_block_size = 128;
   183          table_options.whole_key_filtering = false;
   184          break;
   185  
   186        case 11:
   187          outfile = "h.zstd-compression.sst";
   188          options.table_properties_collector_factories.emplace_back(
   189              new KeyCountPropertyCollectorFactory);
   190          options.compression = rocksdb::kZSTD;
   191          table_options.whole_key_filtering = false;
   192          break;
   193  
   194        default:
   195          continue;
   196      }
   197  
   198      options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options));
   199  
   200      std::unique_ptr<rocksdb::SstFileWriter> tb(new rocksdb::SstFileWriter({}, options));
   201      rocksdb::Status status = tb->Open(outfile);
   202      if (!status.ok()) {
   203        std::cerr << "SstFileWriter::Open: " << status.ToString() << std::endl;
   204        return 1;
   205      }
   206  
   207      int rangeDelLength = 0;
   208      int rangeDelCounter = 0;
   209      std::ifstream in(infile);
   210      std::string s;
   211      std::string rangeDelStart;
   212      for (int i = 0; getline(in, s); i++) {
   213        std::string key(s, 8);
   214        std::string val(s, 0, 7);
   215        val = val.substr(1 + val.rfind(' '));
   216        tb->Put(key.c_str(), val.c_str());
   217        // Add range deletions of increasing length.
   218        if (i % 100 == 0) {
   219          rangeDelStart = key;
   220          rangeDelCounter = 0;
   221          rangeDelLength++;
   222        }
   223        rangeDelCounter++;
   224  
   225        if (rangeDelCounter == rangeDelLength) {
   226          tb->DeleteRange(rangeDelStart, key.c_str());
   227        }
   228      }
   229  
   230      rocksdb::ExternalSstFileInfo info;
   231      status = tb->Finish(&info);
   232      if (!status.ok()) {
   233        std::cerr << "TableBuilder::Finish: " << status.ToString() << std::endl;
   234        return 1;
   235      }
   236  
   237      std::cout << outfile << ": wrote " << info.num_entries << " entries, " << info.file_size << "b" << std::endl;
   238    }
   239    return 0;
   240  }
   241  
   242  int main(int argc, char** argv) {
   243    return write();
   244  
   245  }