github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/sstable/testdata/make-table.cc (about) 1 // Copyright 2011 The LevelDB-Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This program adds N lines from infile to a leveldb table at outfile. 6 // The h.txt infile was generated via: 7 // cat hamlet-act-1.txt | tr '[:upper:]' '[:lower:]' | grep -o -E '\w+' | sort | uniq -c > infile 8 // 9 // To build and run: 10 // g++ make-table.cc -lleveldb && ./a.out 11 12 #include <fstream> 13 #include <iostream> 14 #include <string> 15 16 #include "rocksdb/env.h" 17 #include "rocksdb/filter_policy.h" 18 #include "rocksdb/slice_transform.h" 19 #include "rocksdb/sst_file_writer.h" 20 #include "rocksdb/table.h" 21 22 const char* infile = "h.txt"; 23 24 // A dummy prefix extractor that cuts off the last two bytes for keys of 25 // length three or over. This is not a valid prefix extractor and barely 26 // enough to do a little bit of unit testing. 27 // 28 // TODO(tbg): write some test infra using CockroachDB MVCC data. 29 class PrefixExtractor : public rocksdb::SliceTransform { 30 public: 31 PrefixExtractor() {} 32 33 virtual const char* Name() const { return "leveldb.BytewiseComparator"; } 34 35 virtual rocksdb::Slice Transform(const rocksdb::Slice& src) const { 36 auto sl = rocksdb::Slice(src.data(), src.size()); 37 return sl; 38 } 39 40 virtual bool InDomain(const rocksdb::Slice& src) const { return true; } 41 }; 42 43 class KeyCountPropertyCollector : public rocksdb::TablePropertiesCollector { 44 public: 45 KeyCountPropertyCollector() 46 : count_(0) { 47 } 48 49 rocksdb::Status AddUserKey(const rocksdb::Slice&, const rocksdb::Slice&, 50 rocksdb::EntryType type, rocksdb::SequenceNumber, 51 uint64_t) override { 52 count_++; 53 return rocksdb::Status::OK(); 54 } 55 56 rocksdb::Status Finish(rocksdb::UserCollectedProperties* properties) override { 57 char buf[16]; 58 sprintf(buf, "%d", count_); 59 *properties = rocksdb::UserCollectedProperties{ 60 {"test.key-count", buf}, 61 }; 62 return rocksdb::Status::OK(); 63 } 64 65 const char* Name() const override { return "KeyCountPropertyCollector"; } 66 67 rocksdb::UserCollectedProperties GetReadableProperties() const override { 68 return rocksdb::UserCollectedProperties{}; 69 } 70 71 private: 72 int count_; 73 }; 74 75 class KeyCountPropertyCollectorFactory : public rocksdb::TablePropertiesCollectorFactory { 76 virtual rocksdb::TablePropertiesCollector* CreateTablePropertiesCollector( 77 rocksdb::TablePropertiesCollectorFactory::Context context) override { 78 return new KeyCountPropertyCollector(); 79 } 80 const char* Name() const override { return "KeyCountPropertyCollector"; } 81 }; 82 83 int write() { 84 for (int i = 0; i < 12; ++i) { 85 rocksdb::Options options; 86 rocksdb::BlockBasedTableOptions table_options; 87 const char* outfile; 88 89 table_options.block_size = 2048; 90 table_options.index_shortening = rocksdb::BlockBasedTableOptions::IndexShorteningMode::kShortenSeparatorsAndSuccessor; 91 92 switch (i) { 93 case 0: 94 outfile = "h.ldb"; 95 table_options.format_version = 0; 96 table_options.whole_key_filtering = false; 97 break; 98 99 case 1: 100 outfile = "h.sst"; 101 options.table_properties_collector_factories.emplace_back( 102 new KeyCountPropertyCollectorFactory); 103 table_options.whole_key_filtering = false; 104 break; 105 106 case 2: 107 outfile = "h.no-compression.sst"; 108 options.table_properties_collector_factories.emplace_back( 109 new KeyCountPropertyCollectorFactory); 110 options.compression = rocksdb::kNoCompression; 111 table_options.whole_key_filtering = false; 112 break; 113 114 case 3: 115 outfile = "h.block-bloom.no-compression.sst"; 116 options.compression = rocksdb::kNoCompression; 117 table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true)); 118 table_options.whole_key_filtering = true; 119 break; 120 121 case 4: 122 outfile = "h.table-bloom.sst"; 123 table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false)); 124 table_options.whole_key_filtering = true; 125 break; 126 127 case 5: 128 outfile = "h.table-bloom.no-compression.sst"; 129 options.compression = rocksdb::kNoCompression; 130 table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false)); 131 table_options.whole_key_filtering = true; 132 break; 133 134 case 6: 135 // TODO(peter): unused at this time 136 // 137 // outfile = "h.block-bloom.no-compression.prefix_extractor.sst"; 138 // options.compression = rocksdb::kNoCompression; 139 // options.prefix_extractor.reset(new PrefixExtractor); 140 // table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true)); 141 // table_options.whole_key_filtering = true; 142 // break; 143 continue; 144 145 case 7: 146 // TODO(peter): unused at this time 147 // 148 // outfile = "h.table-bloom.no-compression.prefix_extractor.sst"; 149 // options.compression = rocksdb::kNoCompression; 150 // options.prefix_extractor.reset(new PrefixExtractor); 151 // table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false)); 152 // table_options.whole_key_filtering = true; 153 // break; 154 continue; 155 156 case 8: 157 // TODO(peter): unused at this time 158 // 159 // outfile = "h.block-bloom.no-compression.prefix_extractor.no_whole_key_filter.sst"; 160 // options.compression = rocksdb::kNoCompression; 161 // options.prefix_extractor.reset(new PrefixExtractor); 162 // table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true)); 163 // table_options.whole_key_filtering = false; 164 // break; 165 continue; 166 167 case 9: 168 outfile = "h.table-bloom.no-compression.prefix_extractor.no_whole_key_filter.sst"; 169 options.compression = rocksdb::kNoCompression; 170 options.prefix_extractor.reset(new PrefixExtractor); 171 table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false)); 172 table_options.whole_key_filtering = false; 173 break; 174 175 case 10: 176 outfile = "h.no-compression.two_level_index.sst"; 177 options.table_properties_collector_factories.emplace_back( 178 new KeyCountPropertyCollectorFactory); 179 options.compression = rocksdb::kNoCompression; 180 table_options.index_type = rocksdb::BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; 181 // Use small metadata_block_size to stress two_level_index. 182 table_options.metadata_block_size = 128; 183 table_options.whole_key_filtering = false; 184 break; 185 186 case 11: 187 outfile = "h.zstd-compression.sst"; 188 options.table_properties_collector_factories.emplace_back( 189 new KeyCountPropertyCollectorFactory); 190 options.compression = rocksdb::kZSTD; 191 table_options.whole_key_filtering = false; 192 break; 193 194 default: 195 continue; 196 } 197 198 options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options)); 199 200 std::unique_ptr<rocksdb::SstFileWriter> tb(new rocksdb::SstFileWriter({}, options)); 201 rocksdb::Status status = tb->Open(outfile); 202 if (!status.ok()) { 203 std::cerr << "SstFileWriter::Open: " << status.ToString() << std::endl; 204 return 1; 205 } 206 207 int rangeDelLength = 0; 208 int rangeDelCounter = 0; 209 std::ifstream in(infile); 210 std::string s; 211 std::string rangeDelStart; 212 for (int i = 0; getline(in, s); i++) { 213 std::string key(s, 8); 214 std::string val(s, 0, 7); 215 val = val.substr(1 + val.rfind(' ')); 216 tb->Put(key.c_str(), val.c_str()); 217 // Add range deletions of increasing length. 218 if (i % 100 == 0) { 219 rangeDelStart = key; 220 rangeDelCounter = 0; 221 rangeDelLength++; 222 } 223 rangeDelCounter++; 224 225 if (rangeDelCounter == rangeDelLength) { 226 tb->DeleteRange(rangeDelStart, key.c_str()); 227 } 228 } 229 230 rocksdb::ExternalSstFileInfo info; 231 status = tb->Finish(&info); 232 if (!status.ok()) { 233 std::cerr << "TableBuilder::Finish: " << status.ToString() << std::endl; 234 return 1; 235 } 236 237 std::cout << outfile << ": wrote " << info.num_entries << " entries, " << info.file_size << "b" << std::endl; 238 } 239 return 0; 240 } 241 242 int main(int argc, char** argv) { 243 return write(); 244 245 }