github.com/klaytn/klaytn@v1.12.1/storage/database/rocksdb_database.go (about) 1 // Copyright 2023 The klaytn Authors 2 // This file is part of the klaytn library. 3 // 4 // The klaytn library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The klaytn library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the klaytn library. If not, see <http://www.gnu.org/licenses/>. 16 17 //go:build rocksdb 18 // +build rocksdb 19 20 package database 21 22 import ( 23 "fmt" 24 "os" 25 "strings" 26 "time" 27 28 "github.com/klaytn/klaytn/log" 29 klaytnmetrics "github.com/klaytn/klaytn/metrics" 30 metricutils "github.com/klaytn/klaytn/metrics/utils" 31 "github.com/linxGnu/grocksdb" 32 "github.com/rcrowley/go-metrics" 33 ) 34 35 func filterPolicyStrToNative(t string) *grocksdb.NativeFilterPolicy { 36 switch t { 37 case "bloom": 38 return grocksdb.NewBloomFilter(defaultBitsPerKey) 39 case "ribbon": 40 return grocksdb.NewRibbonFilterPolicy(defaultBitsPerKey) 41 default: 42 return nil 43 } 44 } 45 46 func compressionStrToType(t string) grocksdb.CompressionType { 47 switch t { 48 case "snappy": 49 return grocksdb.SnappyCompression 50 case "zlib": 51 return grocksdb.ZLibCompression 52 case "bz2": 53 return grocksdb.Bz2Compression 54 case "lz4": 55 return grocksdb.LZ4Compression 56 case "lz4hc": 57 return grocksdb.LZ4HCCompression 58 case "xpress": 59 return grocksdb.XpressCompression 60 case "zstd": 61 return grocksdb.ZSTDCompression 62 default: 63 return grocksdb.NoCompression 64 } 65 } 66 67 type rocksDB struct { 68 config *RocksDBConfig 69 db *grocksdb.DB // rocksDB instance 70 71 wo *grocksdb.WriteOptions 72 ro *grocksdb.ReadOptions 73 74 quitCh chan struct{} 75 metrics []metrics.Meter 76 getTimer klaytnmetrics.HybridTimer 77 putTimer klaytnmetrics.HybridTimer 78 batchWriteTimer klaytnmetrics.HybridTimer 79 80 prefix string 81 logger log.Logger 82 } 83 84 // openFile checks if the path is valid directory or not. If not exists, the path directory is created. 85 func openFile(path string, needToMake bool) error { 86 if fi, err := os.Stat(path); err == nil { 87 if !fi.IsDir() { 88 return fmt.Errorf("rocksdb: open %s: not a directory", path) 89 } 90 } else if os.IsNotExist(err) && needToMake { 91 if err := os.MkdirAll(path, 0o755); err != nil { 92 return err 93 } 94 } else { 95 return err 96 } 97 98 return nil 99 } 100 101 func NewRocksDB(path string, config *RocksDBConfig) (*rocksDB, error) { 102 localLogger := logger.NewWith("path", path) 103 104 if err := openFile(path, !config.Secondary); err != nil { 105 return nil, err 106 } 107 108 // Ensure we have some minimal caching and file guarantees 109 if config.CacheSize < minCacheSizeForRocksDB { 110 logger.Warn("Cache size too small, increasing to minimum recommended", "oldCacheSize", config.CacheSize, "newCacheSize", minCacheSizeForRocksDB) 111 config.CacheSize = minCacheSizeForRocksDB 112 } 113 if config.MaxOpenFiles < minOpenFilesForRocksDB { 114 logger.Warn("Max open files too small, increasing to minimum recommended", "oldMaxOpenFiles", config.MaxOpenFiles, "newMaxOpenFiles", minOpenFilesForRocksDB) 115 config.MaxOpenFiles = minOpenFilesForRocksDB 116 } 117 118 blockCacheSize := config.CacheSize / 2 * 1024 * 1024 // half of cacheSize in MiB 119 bufferSize := config.CacheSize / 2 * 1024 * 1024 // half of cacheSize in MiB 120 121 bbto := grocksdb.NewDefaultBlockBasedTableOptions() 122 bbto.SetBlockCache(grocksdb.NewLRUCache(blockCacheSize)) 123 if cacheIndexAndFilter := config.CacheIndexAndFilter; cacheIndexAndFilter { 124 bbto.SetCacheIndexAndFilterBlocks(cacheIndexAndFilter) 125 bbto.SetPinL0FilterAndIndexBlocksInCache(cacheIndexAndFilter) 126 } 127 128 policy := filterPolicyStrToNative(config.FilterPolicy) 129 if policy != nil { 130 bbto.SetFilterPolicy(policy) 131 bbto.SetOptimizeFiltersForMemory(true) 132 } 133 134 opts := grocksdb.NewDefaultOptions() 135 opts.SetBlockBasedTableFactory(bbto) 136 opts.SetCreateIfMissing(true) 137 opts.SetWriteBufferSize(bufferSize) 138 opts.SetDumpMallocStats(config.DumpMallocStat) 139 opts.SetCompression(compressionStrToType(config.CompressionType)) 140 opts.SetBottommostCompression(compressionStrToType(config.BottommostCompressionType)) 141 opts.SetMaxOpenFiles(config.MaxOpenFiles) 142 143 logger.Info("RocksDB configuration", "blockCacheSize", blockCacheSize, "bufferSize", bufferSize, "enableDumpMallocStat", config.DumpMallocStat, "compressionType", config.CompressionType, "bottommostCompressionType", config.BottommostCompressionType, "filterPolicy", config.FilterPolicy, "disableMetrics", config.DisableMetrics, "maxOpenFiles", config.MaxOpenFiles, "cacheIndexAndFilter", config.CacheIndexAndFilter) 144 145 var ( 146 db *grocksdb.DB 147 err error 148 ) 149 150 if config.Secondary { 151 db, err = grocksdb.OpenDbAsSecondary(opts, path, path) 152 } else { 153 db, err = grocksdb.OpenDb(opts, path) 154 } 155 if err != nil { 156 return nil, err 157 } 158 return &rocksDB{ 159 config: config, 160 db: db, 161 wo: grocksdb.NewDefaultWriteOptions(), 162 ro: grocksdb.NewDefaultReadOptions(), 163 logger: localLogger, 164 quitCh: make(chan struct{}), 165 }, nil 166 } 167 168 func (db *rocksDB) Type() DBType { 169 return RocksDB 170 } 171 172 func (db *rocksDB) Put(key []byte, value []byte) error { 173 if db.config.Secondary { 174 return nil 175 } 176 if !db.config.DisableMetrics { 177 start := time.Now() 178 defer db.putTimer.Update(time.Since(start)) 179 } 180 return db.db.Put(db.wo, key, value) 181 } 182 183 func (db *rocksDB) Has(key []byte) (bool, error) { 184 dat, err := db.db.GetBytes(db.ro, key) 185 if dat == nil || err != nil { 186 return false, err 187 } 188 189 return true, nil 190 } 191 192 func (db *rocksDB) Get(key []byte) ([]byte, error) { 193 if !db.config.DisableMetrics { 194 start := time.Now() 195 defer db.getTimer.Update(time.Since(start)) 196 } 197 return db.get(key) 198 } 199 200 func (db *rocksDB) get(key []byte) ([]byte, error) { 201 dat, err := db.db.GetBytes(db.ro, key) 202 if dat == nil { 203 return nil, dataNotFoundErr 204 } 205 206 if err != nil { 207 return nil, err 208 } 209 return dat, nil 210 } 211 212 func (db *rocksDB) Delete(key []byte) error { 213 if db.config.Secondary { 214 return nil 215 } 216 return db.db.Delete(db.wo, key) 217 } 218 219 func (db *rocksDB) TryCatchUpWithPrimary() error { 220 return db.db.TryCatchUpWithPrimary() 221 } 222 223 type rdbIter struct { 224 first bool 225 iter *grocksdb.Iterator 226 prefix []byte 227 db *rocksDB 228 } 229 230 // Next moves the iterator to the next key/value pair. It returns whether the 231 // iterator is exhausted. 232 func (i *rdbIter) Next() bool { 233 if i.first { 234 i.first = false 235 } else { 236 i.iter.Next() 237 } 238 return i.iter.ValidForPrefix(i.prefix) 239 } 240 241 // Error returns any accumulated error. Exhausting all the key/value pairs 242 // is not considered to be an error. 243 func (i *rdbIter) Error() error { 244 if i.first { 245 return nil 246 } 247 return i.iter.Err() 248 } 249 250 // Key returns the key of the current key/value pair, or nil if done. The caller 251 // should not modify the contents of the returned slice, and its contents may 252 // change on the next call to Next. 253 func (i *rdbIter) Key() []byte { 254 if i.first { 255 return nil 256 } 257 key := i.iter.Key() 258 defer key.Free() 259 return key.Data() 260 } 261 262 // Value returns the value of the current key/value pair, or nil if done. The 263 // caller should not modify the contents of the returned slice, and its contents 264 // may change on the next call to Next. 265 func (i *rdbIter) Value() []byte { 266 if i.first { 267 return nil 268 } 269 val := i.iter.Value() 270 defer val.Free() 271 return val.Data() 272 } 273 274 // Release releases associated resources. Release should always succeed and can 275 // be called multiple times without causing error. 276 func (i *rdbIter) Release() { 277 i.iter.Close() 278 } 279 280 // NewIterator creates a binary-alphabetical iterator over a subset 281 // of database content with a particular key prefix, starting at a particular 282 // initial key (or after, if it does not exist). 283 func (db *rocksDB) NewIterator(prefix []byte, start []byte) Iterator { 284 iter := db.db.NewIterator(db.ro) 285 firstKey := append(prefix, start...) 286 iter.Seek(firstKey) 287 return &rdbIter{first: true, iter: iter, prefix: prefix, db: db} 288 } 289 290 func (db *rocksDB) Close() { 291 close(db.quitCh) 292 db.db.CancelAllBackgroundWork(true) 293 db.db.Close() 294 db.wo.Destroy() 295 db.ro.Destroy() 296 db.logger.Info("RocksDB is closed") 297 } 298 299 func (db *rocksDB) updateMeter(name string, meter metrics.Meter) { 300 v, s := db.db.GetIntProperty(name) 301 if s { 302 meter.Mark(int64(v)) 303 } 304 } 305 306 // Meter configures the database metrics collectors and 307 func (db *rocksDB) Meter(prefix string) { 308 db.prefix = prefix 309 310 for _, property := range properties { 311 splited := strings.Split(property, ".") 312 name := strings.ReplaceAll(splited[1], "-", "/") 313 db.metrics = append(db.metrics, metrics.NewRegisteredMeter(prefix+name, nil)) 314 } 315 db.getTimer = klaytnmetrics.NewRegisteredHybridTimer(prefix+"get/time", nil) 316 db.putTimer = klaytnmetrics.NewRegisteredHybridTimer(prefix+"put/time", nil) 317 db.batchWriteTimer = klaytnmetrics.NewRegisteredHybridTimer(prefix+"batchwrite/time", nil) 318 319 // Short circuit metering if the metrics system is disabled 320 // Above meters are initialized by NilMeter if metricutils.Enabled == false 321 if !metricutils.Enabled || db.config.DisableMetrics { 322 return 323 } 324 325 go db.meter(3 * time.Second) 326 } 327 328 func (db *rocksDB) meter(t time.Duration) { 329 ticker := time.NewTicker(t) 330 defer ticker.Stop() 331 332 for { 333 select { 334 case <-db.quitCh: 335 return 336 case <-ticker.C: 337 for idx, property := range properties { 338 db.updateMeter(property, db.metrics[idx]) 339 } 340 } 341 } 342 } 343 344 func (db *rocksDB) NewBatch() Batch { 345 return &rdbBatch{b: grocksdb.NewWriteBatch(), db: db} 346 } 347 348 // rdbBatch is a write-only rocksdb batch that commits changes to its host database 349 // when Write is called. A batch cannot be used concurrently. 350 type rdbBatch struct { 351 b *grocksdb.WriteBatch 352 db *rocksDB 353 size int 354 } 355 356 // Put inserts the given value into the batch for later committing. 357 func (b *rdbBatch) Put(key, value []byte) error { 358 if b.db.config.Secondary { 359 return nil 360 } 361 b.b.Put(key, value) 362 b.size += len(value) 363 return nil 364 } 365 366 // Delete inserts a key removal into the batch for later committing. 367 func (b *rdbBatch) Delete(key []byte) error { 368 if b.db.config.Secondary { 369 return nil 370 } 371 b.b.Delete(key) 372 b.size++ 373 return nil 374 } 375 376 // Write flushes any accumulated data to disk. 377 func (b *rdbBatch) Write() error { 378 if b.db.config.Secondary { 379 return nil 380 } 381 if !b.db.config.DisableMetrics { 382 start := time.Now() 383 defer b.db.batchWriteTimer.Update(time.Since(start)) 384 } 385 return b.write() 386 } 387 388 func (b *rdbBatch) write() error { 389 return b.db.db.Write(b.db.wo, b.b) 390 } 391 392 // ValueSize retrieves the amount of data queued up for writing. 393 func (b *rdbBatch) ValueSize() int { 394 return b.size 395 } 396 397 // Reset resets the batch for reuse. 398 func (b *rdbBatch) Reset() { 399 b.b.Clear() 400 b.size = 0 401 } 402 403 // Release free memory allocated to rocksdb batch object. 404 func (b *rdbBatch) Release() { 405 b.b.Destroy() 406 } 407 408 // Replay replays the batch contents. 409 func (b *rdbBatch) Replay(w KeyValueWriter) error { 410 b.db.logger.Crit("rocksdb batch does not implement Replay method") 411 return nil 412 }