github.com/ethereum/go-ethereum@v1.16.1/ethdb/leveldb/leveldb.go (about) 1 // Copyright 2018 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 //go:build !js && !wasip1 18 // +build !js,!wasip1 19 20 // Package leveldb implements the key-value database layer based on LevelDB. 21 package leveldb 22 23 import ( 24 "bytes" 25 "fmt" 26 "sync" 27 "time" 28 29 "github.com/ethereum/go-ethereum/common" 30 "github.com/ethereum/go-ethereum/ethdb" 31 "github.com/ethereum/go-ethereum/log" 32 "github.com/ethereum/go-ethereum/metrics" 33 "github.com/syndtr/goleveldb/leveldb" 34 "github.com/syndtr/goleveldb/leveldb/errors" 35 "github.com/syndtr/goleveldb/leveldb/filter" 36 "github.com/syndtr/goleveldb/leveldb/opt" 37 "github.com/syndtr/goleveldb/leveldb/util" 38 ) 39 40 const ( 41 // degradationWarnInterval specifies how often warning should be printed if the 42 // leveldb database cannot keep up with requested writes. 43 degradationWarnInterval = time.Minute 44 45 // minCache is the minimum amount of memory in megabytes to allocate to leveldb 46 // read and write caching, split half and half. 47 minCache = 16 48 49 // minHandles is the minimum number of files handles to allocate to the open 50 // database files. 51 minHandles = 16 52 53 // metricsGatheringInterval specifies the interval to retrieve leveldb database 54 // compaction, io and pause stats to report to the user. 55 metricsGatheringInterval = 3 * time.Second 56 ) 57 58 // Database is a persistent key-value store. Apart from basic data storage 59 // functionality it also supports batch writes and iterating over the keyspace in 60 // binary-alphabetical order. 61 type Database struct { 62 fn string // filename for reporting 63 db *leveldb.DB // LevelDB instance 64 65 compTimeMeter *metrics.Meter // Meter for measuring the total time spent in database compaction 66 compReadMeter *metrics.Meter // Meter for measuring the data read during compaction 67 compWriteMeter *metrics.Meter // Meter for measuring the data written during compaction 68 writeDelayNMeter *metrics.Meter // Meter for measuring the write delay number due to database compaction 69 writeDelayMeter *metrics.Meter // Meter for measuring the write delay duration due to database compaction 70 diskSizeGauge *metrics.Gauge // Gauge for tracking the size of all the levels in the database 71 diskReadMeter *metrics.Meter // Meter for measuring the effective amount of data read 72 diskWriteMeter *metrics.Meter // Meter for measuring the effective amount of data written 73 memCompGauge *metrics.Gauge // Gauge for tracking the number of memory compaction 74 level0CompGauge *metrics.Gauge // Gauge for tracking the number of table compaction in level0 75 nonlevel0CompGauge *metrics.Gauge // Gauge for tracking the number of table compaction in non0 level 76 seekCompGauge *metrics.Gauge // Gauge for tracking the number of table compaction caused by read opt 77 manualMemAllocGauge *metrics.Gauge // Gauge to track the amount of memory that has been manually allocated (not a part of runtime/GC) 78 79 levelsGauge []*metrics.Gauge // Gauge for tracking the number of tables in levels 80 81 quitLock sync.Mutex // Mutex protecting the quit channel access 82 quitChan chan chan error // Quit channel to stop the metrics collection before closing the database 83 84 log log.Logger // Contextual logger tracking the database path 85 } 86 87 // New returns a wrapped LevelDB object. The namespace is the prefix that the 88 // metrics reporting should use for surfacing internal stats. 89 func New(file string, cache int, handles int, namespace string, readonly bool) (*Database, error) { 90 return NewCustom(file, namespace, func(options *opt.Options) { 91 // Ensure we have some minimal caching and file guarantees 92 if cache < minCache { 93 cache = minCache 94 } 95 if handles < minHandles { 96 handles = minHandles 97 } 98 // Set default options 99 options.OpenFilesCacheCapacity = handles 100 options.BlockCacheCapacity = cache / 2 * opt.MiB 101 options.WriteBuffer = cache / 4 * opt.MiB // Two of these are used internally 102 if readonly { 103 options.ReadOnly = true 104 } 105 }) 106 } 107 108 // NewCustom returns a wrapped LevelDB object. The namespace is the prefix that the 109 // metrics reporting should use for surfacing internal stats. 110 // The customize function allows the caller to modify the leveldb options. 111 func NewCustom(file string, namespace string, customize func(options *opt.Options)) (*Database, error) { 112 options := configureOptions(customize) 113 logger := log.New("database", file) 114 usedCache := options.GetBlockCacheCapacity() + options.GetWriteBuffer()*2 115 logCtx := []interface{}{"cache", common.StorageSize(usedCache), "handles", options.GetOpenFilesCacheCapacity()} 116 if options.ReadOnly { 117 logCtx = append(logCtx, "readonly", "true") 118 } 119 logger.Info("Allocated cache and file handles", logCtx...) 120 121 // Open the db and recover any potential corruptions 122 db, err := leveldb.OpenFile(file, options) 123 if _, corrupted := err.(*errors.ErrCorrupted); corrupted { 124 db, err = leveldb.RecoverFile(file, nil) 125 } 126 if err != nil { 127 return nil, err 128 } 129 // Assemble the wrapper with all the registered metrics 130 ldb := &Database{ 131 fn: file, 132 db: db, 133 log: logger, 134 quitChan: make(chan chan error), 135 } 136 ldb.compTimeMeter = metrics.NewRegisteredMeter(namespace+"compact/time", nil) 137 ldb.compReadMeter = metrics.NewRegisteredMeter(namespace+"compact/input", nil) 138 ldb.compWriteMeter = metrics.NewRegisteredMeter(namespace+"compact/output", nil) 139 ldb.diskSizeGauge = metrics.NewRegisteredGauge(namespace+"disk/size", nil) 140 ldb.diskReadMeter = metrics.NewRegisteredMeter(namespace+"disk/read", nil) 141 ldb.diskWriteMeter = metrics.NewRegisteredMeter(namespace+"disk/write", nil) 142 ldb.writeDelayMeter = metrics.NewRegisteredMeter(namespace+"compact/writedelay/duration", nil) 143 ldb.writeDelayNMeter = metrics.NewRegisteredMeter(namespace+"compact/writedelay/counter", nil) 144 ldb.memCompGauge = metrics.NewRegisteredGauge(namespace+"compact/memory", nil) 145 ldb.level0CompGauge = metrics.NewRegisteredGauge(namespace+"compact/level0", nil) 146 ldb.nonlevel0CompGauge = metrics.NewRegisteredGauge(namespace+"compact/nonlevel0", nil) 147 ldb.seekCompGauge = metrics.NewRegisteredGauge(namespace+"compact/seek", nil) 148 ldb.manualMemAllocGauge = metrics.NewRegisteredGauge(namespace+"memory/manualalloc", nil) 149 150 // Start up the metrics gathering and return 151 go ldb.meter(metricsGatheringInterval, namespace) 152 return ldb, nil 153 } 154 155 // configureOptions sets some default options, then runs the provided setter. 156 func configureOptions(customizeFn func(*opt.Options)) *opt.Options { 157 // Set default options 158 options := &opt.Options{ 159 Filter: filter.NewBloomFilter(10), 160 DisableSeeksCompaction: true, 161 } 162 // Allow caller to make custom modifications to the options 163 if customizeFn != nil { 164 customizeFn(options) 165 } 166 return options 167 } 168 169 // Close stops the metrics collection, flushes any pending data to disk and closes 170 // all io accesses to the underlying key-value store. 171 func (db *Database) Close() error { 172 db.quitLock.Lock() 173 defer db.quitLock.Unlock() 174 175 if db.quitChan != nil { 176 errc := make(chan error) 177 db.quitChan <- errc 178 if err := <-errc; err != nil { 179 db.log.Error("Metrics collection failed", "err", err) 180 } 181 db.quitChan = nil 182 } 183 return db.db.Close() 184 } 185 186 // Has retrieves if a key is present in the key-value store. 187 func (db *Database) Has(key []byte) (bool, error) { 188 return db.db.Has(key, nil) 189 } 190 191 // Get retrieves the given key if it's present in the key-value store. 192 func (db *Database) Get(key []byte) ([]byte, error) { 193 dat, err := db.db.Get(key, nil) 194 if err != nil { 195 return nil, err 196 } 197 return dat, nil 198 } 199 200 // Put inserts the given value into the key-value store. 201 func (db *Database) Put(key []byte, value []byte) error { 202 return db.db.Put(key, value, nil) 203 } 204 205 // Delete removes the key from the key-value store. 206 func (db *Database) Delete(key []byte) error { 207 return db.db.Delete(key, nil) 208 } 209 210 // DeleteRange deletes all of the keys (and values) in the range [start,end) 211 // (inclusive on start, exclusive on end). 212 // Note that this is a fallback implementation as leveldb does not natively 213 // support range deletion. It can be slow and therefore the number of deleted 214 // keys is limited in order to avoid blocking for a very long time. 215 // ErrTooManyKeys is returned if the range has only been partially deleted. 216 // In this case the caller can repeat the call until it finally succeeds. 217 func (db *Database) DeleteRange(start, end []byte) error { 218 batch := db.NewBatch() 219 it := db.NewIterator(nil, start) 220 defer it.Release() 221 222 var count int 223 for it.Next() && (end == nil || bytes.Compare(end, it.Key()) > 0) { 224 count++ 225 if count > 10000 { // should not block for more than a second 226 if err := batch.Write(); err != nil { 227 return err 228 } 229 return ethdb.ErrTooManyKeys 230 } 231 if err := batch.Delete(it.Key()); err != nil { 232 return err 233 } 234 } 235 return batch.Write() 236 } 237 238 // NewBatch creates a write-only key-value store that buffers changes to its host 239 // database until a final write is called. 240 func (db *Database) NewBatch() ethdb.Batch { 241 return &batch{ 242 db: db.db, 243 b: new(leveldb.Batch), 244 } 245 } 246 247 // NewBatchWithSize creates a write-only database batch with pre-allocated buffer. 248 func (db *Database) NewBatchWithSize(size int) ethdb.Batch { 249 return &batch{ 250 db: db.db, 251 b: leveldb.MakeBatch(size), 252 } 253 } 254 255 // NewIterator creates a binary-alphabetical iterator over a subset 256 // of database content with a particular key prefix, starting at a particular 257 // initial key (or after, if it does not exist). 258 func (db *Database) NewIterator(prefix []byte, start []byte) ethdb.Iterator { 259 return db.db.NewIterator(bytesPrefixRange(prefix, start), nil) 260 } 261 262 // Stat returns the statistic data of the database. 263 func (db *Database) Stat() (string, error) { 264 var stats leveldb.DBStats 265 if err := db.db.Stats(&stats); err != nil { 266 return "", err 267 } 268 var ( 269 message string 270 totalRead int64 271 totalWrite int64 272 totalSize int64 273 totalTables int 274 totalDuration time.Duration 275 ) 276 if len(stats.LevelSizes) > 0 { 277 message += " Level | Tables | Size(MB) | Time(sec) | Read(MB) | Write(MB)\n" + 278 "-------+------------+---------------+---------------+---------------+---------------\n" 279 for level, size := range stats.LevelSizes { 280 read := stats.LevelRead[level] 281 write := stats.LevelWrite[level] 282 duration := stats.LevelDurations[level] 283 tables := stats.LevelTablesCounts[level] 284 285 if tables == 0 && duration == 0 { 286 continue 287 } 288 totalTables += tables 289 totalSize += size 290 totalRead += read 291 totalWrite += write 292 totalDuration += duration 293 message += fmt.Sprintf(" %3d | %10d | %13.5f | %13.5f | %13.5f | %13.5f\n", 294 level, tables, float64(size)/1048576.0, duration.Seconds(), 295 float64(read)/1048576.0, float64(write)/1048576.0) 296 } 297 message += "-------+------------+---------------+---------------+---------------+---------------\n" 298 message += fmt.Sprintf(" Total | %10d | %13.5f | %13.5f | %13.5f | %13.5f\n", 299 totalTables, float64(totalSize)/1048576.0, totalDuration.Seconds(), 300 float64(totalRead)/1048576.0, float64(totalWrite)/1048576.0) 301 message += "-------+------------+---------------+---------------+---------------+---------------\n\n" 302 } 303 message += fmt.Sprintf("Read(MB):%.5f Write(MB):%.5f\n", float64(stats.IORead)/1048576.0, float64(stats.IOWrite)/1048576.0) 304 message += fmt.Sprintf("BlockCache(MB):%.5f FileCache:%d\n", float64(stats.BlockCacheSize)/1048576.0, stats.OpenedTablesCount) 305 message += fmt.Sprintf("MemoryCompaction:%d Level0Compaction:%d NonLevel0Compaction:%d SeekCompaction:%d\n", stats.MemComp, stats.Level0Comp, stats.NonLevel0Comp, stats.SeekComp) 306 message += fmt.Sprintf("WriteDelayCount:%d WriteDelayDuration:%s Paused:%t\n", stats.WriteDelayCount, common.PrettyDuration(stats.WriteDelayDuration), stats.WritePaused) 307 message += fmt.Sprintf("Snapshots:%d Iterators:%d\n", stats.AliveSnapshots, stats.AliveIterators) 308 return message, nil 309 } 310 311 // Compact flattens the underlying data store for the given key range. In essence, 312 // deleted and overwritten versions are discarded, and the data is rearranged to 313 // reduce the cost of operations needed to access them. 314 // 315 // A nil start is treated as a key before all keys in the data store; a nil limit 316 // is treated as a key after all keys in the data store. If both is nil then it 317 // will compact entire data store. 318 func (db *Database) Compact(start []byte, limit []byte) error { 319 return db.db.CompactRange(util.Range{Start: start, Limit: limit}) 320 } 321 322 // Path returns the path to the database directory. 323 func (db *Database) Path() string { 324 return db.fn 325 } 326 327 // SyncKeyValue flushes all pending writes in the write-ahead-log to disk, 328 // ensuring data durability up to that point. 329 func (db *Database) SyncKeyValue() error { 330 // In theory, the WAL (Write-Ahead Log) can be explicitly synchronized using 331 // a write operation with SYNC=true. However, there is no dedicated key reserved 332 // for this purpose, and even a nil key (key=nil) is considered a valid 333 // database entry. 334 // 335 // In LevelDB, writes are blocked until the data is written to the WAL, meaning 336 // recent writes won't be lost unless a power failure or system crash occurs. 337 // Additionally, LevelDB is no longer the default database engine and is likely 338 // only used by hash-mode archive nodes. Given this, the durability guarantees 339 // without explicit sync are acceptable in the context of LevelDB. 340 return nil 341 } 342 343 // meter periodically retrieves internal leveldb counters and reports them to 344 // the metrics subsystem. 345 func (db *Database) meter(refresh time.Duration, namespace string) { 346 // Create the counters to store current and previous compaction values 347 compactions := make([][]int64, 2) 348 for i := 0; i < 2; i++ { 349 compactions[i] = make([]int64, 4) 350 } 351 // Create storages for states and warning log tracer. 352 var ( 353 errc chan error 354 merr error 355 356 stats leveldb.DBStats 357 iostats [2]int64 358 delaystats [2]int64 359 lastWritePaused time.Time 360 ) 361 timer := time.NewTimer(refresh) 362 defer timer.Stop() 363 364 // Iterate ad infinitum and collect the stats 365 for i := 1; errc == nil && merr == nil; i++ { 366 // Retrieve the database stats 367 // Stats method resets buffers inside therefore it's okay to just pass the struct. 368 err := db.db.Stats(&stats) 369 if err != nil { 370 db.log.Error("Failed to read database stats", "err", err) 371 merr = err 372 continue 373 } 374 // Iterate over all the leveldbTable rows, and accumulate the entries 375 for j := 0; j < len(compactions[i%2]); j++ { 376 compactions[i%2][j] = 0 377 } 378 compactions[i%2][0] = stats.LevelSizes.Sum() 379 for _, t := range stats.LevelDurations { 380 compactions[i%2][1] += t.Nanoseconds() 381 } 382 compactions[i%2][2] = stats.LevelRead.Sum() 383 compactions[i%2][3] = stats.LevelWrite.Sum() 384 // Update all the requested meters 385 db.diskSizeGauge.Update(compactions[i%2][0]) 386 db.compTimeMeter.Mark(compactions[i%2][1] - compactions[(i-1)%2][1]) 387 db.compReadMeter.Mark(compactions[i%2][2] - compactions[(i-1)%2][2]) 388 db.compWriteMeter.Mark(compactions[i%2][3] - compactions[(i-1)%2][3]) 389 var ( 390 delayN = int64(stats.WriteDelayCount) 391 duration = stats.WriteDelayDuration 392 paused = stats.WritePaused 393 ) 394 db.writeDelayNMeter.Mark(delayN - delaystats[0]) 395 db.writeDelayMeter.Mark(duration.Nanoseconds() - delaystats[1]) 396 // If a warning that db is performing compaction has been displayed, any subsequent 397 // warnings will be withheld for one minute not to overwhelm the user. 398 if paused && delayN-delaystats[0] == 0 && duration.Nanoseconds()-delaystats[1] == 0 && 399 time.Now().After(lastWritePaused.Add(degradationWarnInterval)) { 400 db.log.Warn("Database compacting, degraded performance") 401 lastWritePaused = time.Now() 402 } 403 delaystats[0], delaystats[1] = delayN, duration.Nanoseconds() 404 405 var ( 406 nRead = int64(stats.IORead) 407 nWrite = int64(stats.IOWrite) 408 ) 409 db.diskReadMeter.Mark(nRead - iostats[0]) 410 db.diskWriteMeter.Mark(nWrite - iostats[1]) 411 iostats[0], iostats[1] = nRead, nWrite 412 413 db.memCompGauge.Update(int64(stats.MemComp)) 414 db.level0CompGauge.Update(int64(stats.Level0Comp)) 415 db.nonlevel0CompGauge.Update(int64(stats.NonLevel0Comp)) 416 db.seekCompGauge.Update(int64(stats.SeekComp)) 417 418 for i, tables := range stats.LevelTablesCounts { 419 // Append metrics for additional layers 420 if i >= len(db.levelsGauge) { 421 db.levelsGauge = append(db.levelsGauge, metrics.NewRegisteredGauge(namespace+fmt.Sprintf("tables/level%v", i), nil)) 422 } 423 db.levelsGauge[i].Update(int64(tables)) 424 } 425 426 // Sleep a bit, then repeat the stats collection 427 select { 428 case errc = <-db.quitChan: 429 // Quit requesting, stop hammering the database 430 case <-timer.C: 431 timer.Reset(refresh) 432 // Timeout, gather a new set of stats 433 } 434 } 435 436 if errc == nil { 437 errc = <-db.quitChan 438 } 439 errc <- merr 440 } 441 442 // batch is a write-only leveldb batch that commits changes to its host database 443 // when Write is called. A batch cannot be used concurrently. 444 type batch struct { 445 db *leveldb.DB 446 b *leveldb.Batch 447 size int 448 } 449 450 // Put inserts the given value into the batch for later committing. 451 func (b *batch) Put(key, value []byte) error { 452 b.b.Put(key, value) 453 b.size += len(key) + len(value) 454 return nil 455 } 456 457 // Delete inserts the key removal into the batch for later committing. 458 func (b *batch) Delete(key []byte) error { 459 b.b.Delete(key) 460 b.size += len(key) 461 return nil 462 } 463 464 // DeleteRange removes all keys in the range [start, end) from the batch for 465 // later committing, inclusive on start, exclusive on end. 466 // 467 // Note that this is a fallback implementation as leveldb does not natively 468 // support range deletion in batches. It iterates through the database to find 469 // keys in the range and adds them to the batch for deletion. 470 func (b *batch) DeleteRange(start, end []byte) error { 471 // Create an iterator to scan through the keys in the range 472 slice := &util.Range{ 473 Start: start, // If nil, it represents the key before all keys 474 Limit: end, // If nil, it represents the key after all keys 475 } 476 it := b.db.NewIterator(slice, nil) 477 defer it.Release() 478 479 var count int 480 for it.Next() { 481 count++ 482 key := it.Key() 483 if count > 10000 { // should not block for more than a second 484 return ethdb.ErrTooManyKeys 485 } 486 // Add this key to the batch for deletion 487 b.b.Delete(key) 488 b.size += len(key) 489 } 490 if err := it.Error(); err != nil { 491 return err 492 } 493 return nil 494 } 495 496 // ValueSize retrieves the amount of data queued up for writing. 497 func (b *batch) ValueSize() int { 498 return b.size 499 } 500 501 // Write flushes any accumulated data to disk. 502 func (b *batch) Write() error { 503 return b.db.Write(b.b, nil) 504 } 505 506 // Reset resets the batch for reuse. 507 func (b *batch) Reset() { 508 b.b.Reset() 509 b.size = 0 510 } 511 512 // Replay replays the batch contents. 513 func (b *batch) Replay(w ethdb.KeyValueWriter) error { 514 return b.b.Replay(&replayer{writer: w}) 515 } 516 517 // replayer is a small wrapper to implement the correct replay methods. 518 type replayer struct { 519 writer ethdb.KeyValueWriter 520 failure error 521 } 522 523 // Put inserts the given value into the key-value data store. 524 func (r *replayer) Put(key, value []byte) { 525 // If the replay already failed, stop executing ops 526 if r.failure != nil { 527 return 528 } 529 r.failure = r.writer.Put(key, value) 530 } 531 532 // Delete removes the key from the key-value data store. 533 func (r *replayer) Delete(key []byte) { 534 // If the replay already failed, stop executing ops 535 if r.failure != nil { 536 return 537 } 538 r.failure = r.writer.Delete(key) 539 } 540 541 // DeleteRange removes all keys in the range [start, end) from the key-value data store. 542 func (r *replayer) DeleteRange(start, end []byte) { 543 // If the replay already failed, stop executing ops 544 if r.failure != nil { 545 return 546 } 547 // Check if the writer also supports range deletion 548 if rangeDeleter, ok := r.writer.(ethdb.KeyValueRangeDeleter); ok { 549 r.failure = rangeDeleter.DeleteRange(start, end) 550 } else { 551 r.failure = fmt.Errorf("ethdb.KeyValueWriter does not implement DeleteRange") 552 } 553 } 554 555 // bytesPrefixRange returns key range that satisfy 556 // - the given prefix, and 557 // - the given seek position 558 func bytesPrefixRange(prefix, start []byte) *util.Range { 559 r := util.BytesPrefix(prefix) 560 r.Start = append(r.Start, start...) 561 return r 562 }