github.1485827954.workers.dev/ethereum/go-ethereum@v1.14.3/ethdb/leveldb/leveldb.go (about) 1 // Copyright 2018 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 //go:build !js 18 // +build !js 19 20 // Package leveldb implements the key-value database layer based on LevelDB. 21 package leveldb 22 23 import ( 24 "fmt" 25 "strings" 26 "sync" 27 "time" 28 29 "github.com/ethereum/go-ethereum/common" 30 "github.com/ethereum/go-ethereum/ethdb" 31 "github.com/ethereum/go-ethereum/log" 32 "github.com/ethereum/go-ethereum/metrics" 33 "github.com/syndtr/goleveldb/leveldb" 34 "github.com/syndtr/goleveldb/leveldb/errors" 35 "github.com/syndtr/goleveldb/leveldb/filter" 36 "github.com/syndtr/goleveldb/leveldb/opt" 37 "github.com/syndtr/goleveldb/leveldb/util" 38 ) 39 40 const ( 41 // degradationWarnInterval specifies how often warning should be printed if the 42 // leveldb database cannot keep up with requested writes. 43 degradationWarnInterval = time.Minute 44 45 // minCache is the minimum amount of memory in megabytes to allocate to leveldb 46 // read and write caching, split half and half. 47 minCache = 16 48 49 // minHandles is the minimum number of files handles to allocate to the open 50 // database files. 51 minHandles = 16 52 53 // metricsGatheringInterval specifies the interval to retrieve leveldb database 54 // compaction, io and pause stats to report to the user. 55 metricsGatheringInterval = 3 * time.Second 56 ) 57 58 // Database is a persistent key-value store. Apart from basic data storage 59 // functionality it also supports batch writes and iterating over the keyspace in 60 // binary-alphabetical order. 61 type Database struct { 62 fn string // filename for reporting 63 db *leveldb.DB // LevelDB instance 64 65 compTimeMeter metrics.Meter // Meter for measuring the total time spent in database compaction 66 compReadMeter metrics.Meter // Meter for measuring the data read during compaction 67 compWriteMeter metrics.Meter // Meter for measuring the data written during compaction 68 writeDelayNMeter metrics.Meter // Meter for measuring the write delay number due to database compaction 69 writeDelayMeter metrics.Meter // Meter for measuring the write delay duration due to database compaction 70 diskSizeGauge metrics.Gauge // Gauge for tracking the size of all the levels in the database 71 diskReadMeter metrics.Meter // Meter for measuring the effective amount of data read 72 diskWriteMeter metrics.Meter // Meter for measuring the effective amount of data written 73 memCompGauge metrics.Gauge // Gauge for tracking the number of memory compaction 74 level0CompGauge metrics.Gauge // Gauge for tracking the number of table compaction in level0 75 nonlevel0CompGauge metrics.Gauge // Gauge for tracking the number of table compaction in non0 level 76 seekCompGauge metrics.Gauge // Gauge for tracking the number of table compaction caused by read opt 77 manualMemAllocGauge metrics.Gauge // Gauge to track the amount of memory that has been manually allocated (not a part of runtime/GC) 78 79 levelsGauge []metrics.Gauge // Gauge for tracking the number of tables in levels 80 81 quitLock sync.Mutex // Mutex protecting the quit channel access 82 quitChan chan chan error // Quit channel to stop the metrics collection before closing the database 83 84 log log.Logger // Contextual logger tracking the database path 85 } 86 87 // New returns a wrapped LevelDB object. The namespace is the prefix that the 88 // metrics reporting should use for surfacing internal stats. 89 func New(file string, cache int, handles int, namespace string, readonly bool) (*Database, error) { 90 return NewCustom(file, namespace, func(options *opt.Options) { 91 // Ensure we have some minimal caching and file guarantees 92 if cache < minCache { 93 cache = minCache 94 } 95 if handles < minHandles { 96 handles = minHandles 97 } 98 // Set default options 99 options.OpenFilesCacheCapacity = handles 100 options.BlockCacheCapacity = cache / 2 * opt.MiB 101 options.WriteBuffer = cache / 4 * opt.MiB // Two of these are used internally 102 if readonly { 103 options.ReadOnly = true 104 } 105 }) 106 } 107 108 // NewCustom returns a wrapped LevelDB object. The namespace is the prefix that the 109 // metrics reporting should use for surfacing internal stats. 110 // The customize function allows the caller to modify the leveldb options. 111 func NewCustom(file string, namespace string, customize func(options *opt.Options)) (*Database, error) { 112 options := configureOptions(customize) 113 logger := log.New("database", file) 114 usedCache := options.GetBlockCacheCapacity() + options.GetWriteBuffer()*2 115 logCtx := []interface{}{"cache", common.StorageSize(usedCache), "handles", options.GetOpenFilesCacheCapacity()} 116 if options.ReadOnly { 117 logCtx = append(logCtx, "readonly", "true") 118 } 119 logger.Info("Allocated cache and file handles", logCtx...) 120 121 // Open the db and recover any potential corruptions 122 db, err := leveldb.OpenFile(file, options) 123 if _, corrupted := err.(*errors.ErrCorrupted); corrupted { 124 db, err = leveldb.RecoverFile(file, nil) 125 } 126 if err != nil { 127 return nil, err 128 } 129 // Assemble the wrapper with all the registered metrics 130 ldb := &Database{ 131 fn: file, 132 db: db, 133 log: logger, 134 quitChan: make(chan chan error), 135 } 136 ldb.compTimeMeter = metrics.NewRegisteredMeter(namespace+"compact/time", nil) 137 ldb.compReadMeter = metrics.NewRegisteredMeter(namespace+"compact/input", nil) 138 ldb.compWriteMeter = metrics.NewRegisteredMeter(namespace+"compact/output", nil) 139 ldb.diskSizeGauge = metrics.NewRegisteredGauge(namespace+"disk/size", nil) 140 ldb.diskReadMeter = metrics.NewRegisteredMeter(namespace+"disk/read", nil) 141 ldb.diskWriteMeter = metrics.NewRegisteredMeter(namespace+"disk/write", nil) 142 ldb.writeDelayMeter = metrics.NewRegisteredMeter(namespace+"compact/writedelay/duration", nil) 143 ldb.writeDelayNMeter = metrics.NewRegisteredMeter(namespace+"compact/writedelay/counter", nil) 144 ldb.memCompGauge = metrics.NewRegisteredGauge(namespace+"compact/memory", nil) 145 ldb.level0CompGauge = metrics.NewRegisteredGauge(namespace+"compact/level0", nil) 146 ldb.nonlevel0CompGauge = metrics.NewRegisteredGauge(namespace+"compact/nonlevel0", nil) 147 ldb.seekCompGauge = metrics.NewRegisteredGauge(namespace+"compact/seek", nil) 148 ldb.manualMemAllocGauge = metrics.NewRegisteredGauge(namespace+"memory/manualalloc", nil) 149 150 // Start up the metrics gathering and return 151 go ldb.meter(metricsGatheringInterval, namespace) 152 return ldb, nil 153 } 154 155 // configureOptions sets some default options, then runs the provided setter. 156 func configureOptions(customizeFn func(*opt.Options)) *opt.Options { 157 // Set default options 158 options := &opt.Options{ 159 Filter: filter.NewBloomFilter(10), 160 DisableSeeksCompaction: true, 161 } 162 // Allow caller to make custom modifications to the options 163 if customizeFn != nil { 164 customizeFn(options) 165 } 166 return options 167 } 168 169 // Close stops the metrics collection, flushes any pending data to disk and closes 170 // all io accesses to the underlying key-value store. 171 func (db *Database) Close() error { 172 db.quitLock.Lock() 173 defer db.quitLock.Unlock() 174 175 if db.quitChan != nil { 176 errc := make(chan error) 177 db.quitChan <- errc 178 if err := <-errc; err != nil { 179 db.log.Error("Metrics collection failed", "err", err) 180 } 181 db.quitChan = nil 182 } 183 return db.db.Close() 184 } 185 186 // Has retrieves if a key is present in the key-value store. 187 func (db *Database) Has(key []byte) (bool, error) { 188 return db.db.Has(key, nil) 189 } 190 191 // Get retrieves the given key if it's present in the key-value store. 192 func (db *Database) Get(key []byte) ([]byte, error) { 193 dat, err := db.db.Get(key, nil) 194 if err != nil { 195 return nil, err 196 } 197 return dat, nil 198 } 199 200 // Put inserts the given value into the key-value store. 201 func (db *Database) Put(key []byte, value []byte) error { 202 return db.db.Put(key, value, nil) 203 } 204 205 // Delete removes the key from the key-value store. 206 func (db *Database) Delete(key []byte) error { 207 return db.db.Delete(key, nil) 208 } 209 210 // NewBatch creates a write-only key-value store that buffers changes to its host 211 // database until a final write is called. 212 func (db *Database) NewBatch() ethdb.Batch { 213 return &batch{ 214 db: db.db, 215 b: new(leveldb.Batch), 216 } 217 } 218 219 // NewBatchWithSize creates a write-only database batch with pre-allocated buffer. 220 func (db *Database) NewBatchWithSize(size int) ethdb.Batch { 221 return &batch{ 222 db: db.db, 223 b: leveldb.MakeBatch(size), 224 } 225 } 226 227 // NewIterator creates a binary-alphabetical iterator over a subset 228 // of database content with a particular key prefix, starting at a particular 229 // initial key (or after, if it does not exist). 230 func (db *Database) NewIterator(prefix []byte, start []byte) ethdb.Iterator { 231 return db.db.NewIterator(bytesPrefixRange(prefix, start), nil) 232 } 233 234 // NewSnapshot creates a database snapshot based on the current state. 235 // The created snapshot will not be affected by all following mutations 236 // happened on the database. 237 // Note don't forget to release the snapshot once it's used up, otherwise 238 // the stale data will never be cleaned up by the underlying compactor. 239 func (db *Database) NewSnapshot() (ethdb.Snapshot, error) { 240 snap, err := db.db.GetSnapshot() 241 if err != nil { 242 return nil, err 243 } 244 return &snapshot{db: snap}, nil 245 } 246 247 // Stat returns a particular internal stat of the database. 248 func (db *Database) Stat(property string) (string, error) { 249 if property == "" { 250 property = "leveldb.stats" 251 } else if !strings.HasPrefix(property, "leveldb.") { 252 property = "leveldb." + property 253 } 254 return db.db.GetProperty(property) 255 } 256 257 // Compact flattens the underlying data store for the given key range. In essence, 258 // deleted and overwritten versions are discarded, and the data is rearranged to 259 // reduce the cost of operations needed to access them. 260 // 261 // A nil start is treated as a key before all keys in the data store; a nil limit 262 // is treated as a key after all keys in the data store. If both is nil then it 263 // will compact entire data store. 264 func (db *Database) Compact(start []byte, limit []byte) error { 265 return db.db.CompactRange(util.Range{Start: start, Limit: limit}) 266 } 267 268 // Path returns the path to the database directory. 269 func (db *Database) Path() string { 270 return db.fn 271 } 272 273 // meter periodically retrieves internal leveldb counters and reports them to 274 // the metrics subsystem. 275 func (db *Database) meter(refresh time.Duration, namespace string) { 276 // Create the counters to store current and previous compaction values 277 compactions := make([][]int64, 2) 278 for i := 0; i < 2; i++ { 279 compactions[i] = make([]int64, 4) 280 } 281 // Create storages for states and warning log tracer. 282 var ( 283 errc chan error 284 merr error 285 286 stats leveldb.DBStats 287 iostats [2]int64 288 delaystats [2]int64 289 lastWritePaused time.Time 290 ) 291 timer := time.NewTimer(refresh) 292 defer timer.Stop() 293 294 // Iterate ad infinitum and collect the stats 295 for i := 1; errc == nil && merr == nil; i++ { 296 // Retrieve the database stats 297 // Stats method resets buffers inside therefore it's okay to just pass the struct. 298 err := db.db.Stats(&stats) 299 if err != nil { 300 db.log.Error("Failed to read database stats", "err", err) 301 merr = err 302 continue 303 } 304 // Iterate over all the leveldbTable rows, and accumulate the entries 305 for j := 0; j < len(compactions[i%2]); j++ { 306 compactions[i%2][j] = 0 307 } 308 compactions[i%2][0] = stats.LevelSizes.Sum() 309 for _, t := range stats.LevelDurations { 310 compactions[i%2][1] += t.Nanoseconds() 311 } 312 compactions[i%2][2] = stats.LevelRead.Sum() 313 compactions[i%2][3] = stats.LevelWrite.Sum() 314 // Update all the requested meters 315 if db.diskSizeGauge != nil { 316 db.diskSizeGauge.Update(compactions[i%2][0]) 317 } 318 if db.compTimeMeter != nil { 319 db.compTimeMeter.Mark(compactions[i%2][1] - compactions[(i-1)%2][1]) 320 } 321 if db.compReadMeter != nil { 322 db.compReadMeter.Mark(compactions[i%2][2] - compactions[(i-1)%2][2]) 323 } 324 if db.compWriteMeter != nil { 325 db.compWriteMeter.Mark(compactions[i%2][3] - compactions[(i-1)%2][3]) 326 } 327 var ( 328 delayN = int64(stats.WriteDelayCount) 329 duration = stats.WriteDelayDuration 330 paused = stats.WritePaused 331 ) 332 if db.writeDelayNMeter != nil { 333 db.writeDelayNMeter.Mark(delayN - delaystats[0]) 334 } 335 if db.writeDelayMeter != nil { 336 db.writeDelayMeter.Mark(duration.Nanoseconds() - delaystats[1]) 337 } 338 // If a warning that db is performing compaction has been displayed, any subsequent 339 // warnings will be withheld for one minute not to overwhelm the user. 340 if paused && delayN-delaystats[0] == 0 && duration.Nanoseconds()-delaystats[1] == 0 && 341 time.Now().After(lastWritePaused.Add(degradationWarnInterval)) { 342 db.log.Warn("Database compacting, degraded performance") 343 lastWritePaused = time.Now() 344 } 345 delaystats[0], delaystats[1] = delayN, duration.Nanoseconds() 346 347 var ( 348 nRead = int64(stats.IORead) 349 nWrite = int64(stats.IOWrite) 350 ) 351 if db.diskReadMeter != nil { 352 db.diskReadMeter.Mark(nRead - iostats[0]) 353 } 354 if db.diskWriteMeter != nil { 355 db.diskWriteMeter.Mark(nWrite - iostats[1]) 356 } 357 iostats[0], iostats[1] = nRead, nWrite 358 359 db.memCompGauge.Update(int64(stats.MemComp)) 360 db.level0CompGauge.Update(int64(stats.Level0Comp)) 361 db.nonlevel0CompGauge.Update(int64(stats.NonLevel0Comp)) 362 db.seekCompGauge.Update(int64(stats.SeekComp)) 363 364 for i, tables := range stats.LevelTablesCounts { 365 // Append metrics for additional layers 366 if i >= len(db.levelsGauge) { 367 db.levelsGauge = append(db.levelsGauge, metrics.NewRegisteredGauge(namespace+fmt.Sprintf("tables/level%v", i), nil)) 368 } 369 db.levelsGauge[i].Update(int64(tables)) 370 } 371 372 // Sleep a bit, then repeat the stats collection 373 select { 374 case errc = <-db.quitChan: 375 // Quit requesting, stop hammering the database 376 case <-timer.C: 377 timer.Reset(refresh) 378 // Timeout, gather a new set of stats 379 } 380 } 381 382 if errc == nil { 383 errc = <-db.quitChan 384 } 385 errc <- merr 386 } 387 388 // batch is a write-only leveldb batch that commits changes to its host database 389 // when Write is called. A batch cannot be used concurrently. 390 type batch struct { 391 db *leveldb.DB 392 b *leveldb.Batch 393 size int 394 } 395 396 // Put inserts the given value into the batch for later committing. 397 func (b *batch) Put(key, value []byte) error { 398 b.b.Put(key, value) 399 b.size += len(key) + len(value) 400 return nil 401 } 402 403 // Delete inserts the a key removal into the batch for later committing. 404 func (b *batch) Delete(key []byte) error { 405 b.b.Delete(key) 406 b.size += len(key) 407 return nil 408 } 409 410 // ValueSize retrieves the amount of data queued up for writing. 411 func (b *batch) ValueSize() int { 412 return b.size 413 } 414 415 // Write flushes any accumulated data to disk. 416 func (b *batch) Write() error { 417 return b.db.Write(b.b, nil) 418 } 419 420 // Reset resets the batch for reuse. 421 func (b *batch) Reset() { 422 b.b.Reset() 423 b.size = 0 424 } 425 426 // Replay replays the batch contents. 427 func (b *batch) Replay(w ethdb.KeyValueWriter) error { 428 return b.b.Replay(&replayer{writer: w}) 429 } 430 431 // replayer is a small wrapper to implement the correct replay methods. 432 type replayer struct { 433 writer ethdb.KeyValueWriter 434 failure error 435 } 436 437 // Put inserts the given value into the key-value data store. 438 func (r *replayer) Put(key, value []byte) { 439 // If the replay already failed, stop executing ops 440 if r.failure != nil { 441 return 442 } 443 r.failure = r.writer.Put(key, value) 444 } 445 446 // Delete removes the key from the key-value data store. 447 func (r *replayer) Delete(key []byte) { 448 // If the replay already failed, stop executing ops 449 if r.failure != nil { 450 return 451 } 452 r.failure = r.writer.Delete(key) 453 } 454 455 // bytesPrefixRange returns key range that satisfy 456 // - the given prefix, and 457 // - the given seek position 458 func bytesPrefixRange(prefix, start []byte) *util.Range { 459 r := util.BytesPrefix(prefix) 460 r.Start = append(r.Start, start...) 461 return r 462 } 463 464 // snapshot wraps a leveldb snapshot for implementing the Snapshot interface. 465 type snapshot struct { 466 db *leveldb.Snapshot 467 } 468 469 // Has retrieves if a key is present in the snapshot backing by a key-value 470 // data store. 471 func (snap *snapshot) Has(key []byte) (bool, error) { 472 return snap.db.Has(key, nil) 473 } 474 475 // Get retrieves the given key if it's present in the snapshot backing by 476 // key-value data store. 477 func (snap *snapshot) Get(key []byte) ([]byte, error) { 478 return snap.db.Get(key, nil) 479 } 480 481 // Release releases associated resources. Release should always succeed and can 482 // be called multiple times without causing error. 483 func (snap *snapshot) Release() { 484 snap.db.Release() 485 }