github.com/MetalBlockchain/metalgo@v1.11.9/database/leveldb/db.go (about) 1 // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. 2 // See the file LICENSE for licensing terms. 3 4 package leveldb 5 6 import ( 7 "bytes" 8 "context" 9 "encoding/json" 10 "fmt" 11 "math" 12 "slices" 13 "sync" 14 "time" 15 16 "github.com/prometheus/client_golang/prometheus" 17 "github.com/syndtr/goleveldb/leveldb" 18 "github.com/syndtr/goleveldb/leveldb/errors" 19 "github.com/syndtr/goleveldb/leveldb/filter" 20 "github.com/syndtr/goleveldb/leveldb/iterator" 21 "github.com/syndtr/goleveldb/leveldb/opt" 22 "github.com/syndtr/goleveldb/leveldb/util" 23 "go.uber.org/zap" 24 25 "github.com/MetalBlockchain/metalgo/database" 26 "github.com/MetalBlockchain/metalgo/utils" 27 "github.com/MetalBlockchain/metalgo/utils/logging" 28 ) 29 30 const ( 31 // Name is the name of this database for database switches 32 Name = "leveldb" 33 34 // DefaultBlockCacheSize is the number of bytes to use for block caching in 35 // leveldb. 36 DefaultBlockCacheSize = 12 * opt.MiB 37 38 // DefaultWriteBufferSize is the number of bytes to use for buffers in 39 // leveldb. 40 DefaultWriteBufferSize = 12 * opt.MiB 41 42 // DefaultHandleCap is the number of files descriptors to cap levelDB to 43 // use. 44 DefaultHandleCap = 1024 45 46 // DefaultBitsPerKey is the number of bits to add to the bloom filter per 47 // key. 48 DefaultBitsPerKey = 10 49 50 // DefaultMaxManifestFileSize is the default maximum size of a manifest 51 // file. 52 // 53 // This avoids https://github.com/syndtr/goleveldb/issues/413. 54 DefaultMaxManifestFileSize = math.MaxInt64 55 56 // DefaultMetricUpdateFrequency is the frequency to poll the LevelDB 57 // metrics. 58 DefaultMetricUpdateFrequency = 10 * time.Second 59 60 // levelDBByteOverhead is the number of bytes of constant overhead that 61 // should be added to a batch size per operation. 62 levelDBByteOverhead = 8 63 ) 64 65 var ( 66 _ database.Database = (*Database)(nil) 67 _ database.Batch = (*batch)(nil) 68 _ database.Iterator = (*iter)(nil) 69 70 ErrInvalidConfig = errors.New("invalid config") 71 ErrCouldNotOpen = errors.New("could not open") 72 ) 73 74 // Database is a persistent key-value store. Apart from basic data storage 75 // functionality it also supports batch writes and iterating over the keyspace 76 // in binary-alphabetical order. 77 type Database struct { 78 *leveldb.DB 79 // metrics is only initialized and used when [MetricUpdateFrequency] is >= 0 80 // in the config 81 metrics metrics 82 closed utils.Atomic[bool] 83 closeOnce sync.Once 84 // closeCh is closed when Close() is called. 85 closeCh chan struct{} 86 // closeWg is used to wait for all goroutines created by New() to exit. 87 // This avoids racy behavior when Close() is called at the same time as 88 // Stats(). See: https://github.com/syndtr/goleveldb/issues/418 89 closeWg sync.WaitGroup 90 } 91 92 type config struct { 93 // BlockCacheCapacity defines the capacity of the 'sorted table' block caching. 94 // Use -1 for zero, this has same effect as specifying NoCacher to BlockCacher. 95 // 96 // The default value is 12MiB. 97 BlockCacheCapacity int `json:"blockCacheCapacity"` 98 // BlockSize is the minimum uncompressed size in bytes of each 'sorted table' 99 // block. 100 // 101 // The default value is 4KiB. 102 BlockSize int `json:"blockSize"` 103 // CompactionExpandLimitFactor limits compaction size after expanded. 104 // This will be multiplied by table size limit at compaction target level. 105 // 106 // The default value is 25. 107 CompactionExpandLimitFactor int `json:"compactionExpandLimitFactor"` 108 // CompactionGPOverlapsFactor limits overlaps in grandparent (Level + 2) 109 // that a single 'sorted table' generates. This will be multiplied by 110 // table size limit at grandparent level. 111 // 112 // The default value is 10. 113 CompactionGPOverlapsFactor int `json:"compactionGPOverlapsFactor"` 114 // CompactionL0Trigger defines number of 'sorted table' at level-0 that will 115 // trigger compaction. 116 // 117 // The default value is 4. 118 CompactionL0Trigger int `json:"compactionL0Trigger"` 119 // CompactionSourceLimitFactor limits compaction source size. This doesn't apply to 120 // level-0. 121 // This will be multiplied by table size limit at compaction target level. 122 // 123 // The default value is 1. 124 CompactionSourceLimitFactor int `json:"compactionSourceLimitFactor"` 125 // CompactionTableSize limits size of 'sorted table' that compaction generates. 126 // The limits for each level will be calculated as: 127 // CompactionTableSize * (CompactionTableSizeMultiplier ^ Level) 128 // The multiplier for each level can also fine-tuned using CompactionTableSizeMultiplierPerLevel. 129 // 130 // The default value is 2MiB. 131 CompactionTableSize int `json:"compactionTableSize"` 132 // CompactionTableSizeMultiplier defines multiplier for CompactionTableSize. 133 // 134 // The default value is 1. 135 CompactionTableSizeMultiplier float64 `json:"compactionTableSizeMultiplier"` 136 // CompactionTableSizeMultiplierPerLevel defines per-level multiplier for 137 // CompactionTableSize. 138 // Use zero to skip a level. 139 // 140 // The default value is nil. 141 CompactionTableSizeMultiplierPerLevel []float64 `json:"compactionTableSizeMultiplierPerLevel"` 142 // CompactionTotalSize limits total size of 'sorted table' for each level. 143 // The limits for each level will be calculated as: 144 // CompactionTotalSize * (CompactionTotalSizeMultiplier ^ Level) 145 // The multiplier for each level can also fine-tuned using 146 // CompactionTotalSizeMultiplierPerLevel. 147 // 148 // The default value is 10MiB. 149 CompactionTotalSize int `json:"compactionTotalSize"` 150 // CompactionTotalSizeMultiplier defines multiplier for CompactionTotalSize. 151 // 152 // The default value is 10. 153 CompactionTotalSizeMultiplier float64 `json:"compactionTotalSizeMultiplier"` 154 // DisableSeeksCompaction allows disabling 'seeks triggered compaction'. 155 // The purpose of 'seeks triggered compaction' is to optimize database so 156 // that 'level seeks' can be minimized, however this might generate many 157 // small compaction which may not preferable. 158 // 159 // The default is true. 160 DisableSeeksCompaction bool `json:"disableSeeksCompaction"` 161 // OpenFilesCacheCapacity defines the capacity of the open files caching. 162 // Use -1 for zero, this has same effect as specifying NoCacher to OpenFilesCacher. 163 // 164 // The default value is 1024. 165 OpenFilesCacheCapacity int `json:"openFilesCacheCapacity"` 166 // WriteBuffer defines maximum size of a 'memdb' before flushed to 167 // 'sorted table'. 'memdb' is an in-memory DB backed by an on-disk 168 // unsorted journal. 169 // 170 // LevelDB may held up to two 'memdb' at the same time. 171 // 172 // The default value is 6MiB. 173 WriteBuffer int `json:"writeBuffer"` 174 FilterBitsPerKey int `json:"filterBitsPerKey"` 175 176 // MaxManifestFileSize is the maximum size limit of the MANIFEST-****** file. 177 // When the MANIFEST-****** file grows beyond this size, LevelDB will create 178 // a new MANIFEST file. 179 // 180 // The default value is infinity. 181 MaxManifestFileSize int64 `json:"maxManifestFileSize"` 182 183 // MetricUpdateFrequency is the frequency to poll LevelDB metrics. 184 // If <= 0, LevelDB metrics aren't polled. 185 MetricUpdateFrequency time.Duration `json:"metricUpdateFrequency"` 186 } 187 188 // New returns a wrapped LevelDB object. 189 func New(file string, configBytes []byte, log logging.Logger, reg prometheus.Registerer) (database.Database, error) { 190 parsedConfig := config{ 191 BlockCacheCapacity: DefaultBlockCacheSize, 192 DisableSeeksCompaction: true, 193 OpenFilesCacheCapacity: DefaultHandleCap, 194 WriteBuffer: DefaultWriteBufferSize / 2, 195 FilterBitsPerKey: DefaultBitsPerKey, 196 MaxManifestFileSize: DefaultMaxManifestFileSize, 197 MetricUpdateFrequency: DefaultMetricUpdateFrequency, 198 } 199 if len(configBytes) > 0 { 200 if err := json.Unmarshal(configBytes, &parsedConfig); err != nil { 201 return nil, fmt.Errorf("%w: %w", ErrInvalidConfig, err) 202 } 203 } 204 205 log.Info("creating leveldb", 206 zap.Reflect("config", parsedConfig), 207 ) 208 209 // Open the db and recover any potential corruptions 210 db, err := leveldb.OpenFile(file, &opt.Options{ 211 BlockCacheCapacity: parsedConfig.BlockCacheCapacity, 212 BlockSize: parsedConfig.BlockSize, 213 CompactionExpandLimitFactor: parsedConfig.CompactionExpandLimitFactor, 214 CompactionGPOverlapsFactor: parsedConfig.CompactionGPOverlapsFactor, 215 CompactionL0Trigger: parsedConfig.CompactionL0Trigger, 216 CompactionSourceLimitFactor: parsedConfig.CompactionSourceLimitFactor, 217 CompactionTableSize: parsedConfig.CompactionTableSize, 218 CompactionTableSizeMultiplier: parsedConfig.CompactionTableSizeMultiplier, 219 CompactionTotalSize: parsedConfig.CompactionTotalSize, 220 CompactionTotalSizeMultiplier: parsedConfig.CompactionTotalSizeMultiplier, 221 DisableSeeksCompaction: parsedConfig.DisableSeeksCompaction, 222 OpenFilesCacheCapacity: parsedConfig.OpenFilesCacheCapacity, 223 WriteBuffer: parsedConfig.WriteBuffer, 224 Filter: filter.NewBloomFilter(parsedConfig.FilterBitsPerKey), 225 MaxManifestFileSize: parsedConfig.MaxManifestFileSize, 226 }) 227 if _, corrupted := err.(*errors.ErrCorrupted); corrupted { 228 db, err = leveldb.RecoverFile(file, nil) 229 } 230 if err != nil { 231 return nil, fmt.Errorf("%w: %w", ErrCouldNotOpen, err) 232 } 233 234 wrappedDB := &Database{ 235 DB: db, 236 closeCh: make(chan struct{}), 237 } 238 if parsedConfig.MetricUpdateFrequency > 0 { 239 metrics, err := newMetrics(reg) 240 if err != nil { 241 // Drop any close error to report the original error 242 _ = db.Close() 243 return nil, err 244 } 245 wrappedDB.metrics = metrics 246 wrappedDB.closeWg.Add(1) 247 go func() { 248 t := time.NewTicker(parsedConfig.MetricUpdateFrequency) 249 defer func() { 250 t.Stop() 251 wrappedDB.closeWg.Done() 252 }() 253 254 for { 255 if err := wrappedDB.updateMetrics(); err != nil { 256 log.Warn("failed to update leveldb metrics", 257 zap.Error(err), 258 ) 259 } 260 261 select { 262 case <-t.C: 263 case <-wrappedDB.closeCh: 264 return 265 } 266 } 267 }() 268 } 269 return wrappedDB, nil 270 } 271 272 // Has returns if the key is set in the database 273 func (db *Database) Has(key []byte) (bool, error) { 274 has, err := db.DB.Has(key, nil) 275 return has, updateError(err) 276 } 277 278 // Get returns the value the key maps to in the database 279 func (db *Database) Get(key []byte) ([]byte, error) { 280 value, err := db.DB.Get(key, nil) 281 return value, updateError(err) 282 } 283 284 // Put sets the value of the provided key to the provided value 285 func (db *Database) Put(key []byte, value []byte) error { 286 return updateError(db.DB.Put(key, value, nil)) 287 } 288 289 // Delete removes the key from the database 290 func (db *Database) Delete(key []byte) error { 291 return updateError(db.DB.Delete(key, nil)) 292 } 293 294 // NewBatch creates a write/delete-only buffer that is atomically committed to 295 // the database when write is called 296 func (db *Database) NewBatch() database.Batch { 297 return &batch{db: db} 298 } 299 300 // NewIterator creates a lexicographically ordered iterator over the database 301 func (db *Database) NewIterator() database.Iterator { 302 return &iter{ 303 db: db, 304 Iterator: db.DB.NewIterator(new(util.Range), nil), 305 } 306 } 307 308 // NewIteratorWithStart creates a lexicographically ordered iterator over the 309 // database starting at the provided key 310 func (db *Database) NewIteratorWithStart(start []byte) database.Iterator { 311 return &iter{ 312 db: db, 313 Iterator: db.DB.NewIterator(&util.Range{Start: start}, nil), 314 } 315 } 316 317 // NewIteratorWithPrefix creates a lexicographically ordered iterator over the 318 // database ignoring keys that do not start with the provided prefix 319 func (db *Database) NewIteratorWithPrefix(prefix []byte) database.Iterator { 320 return &iter{ 321 db: db, 322 Iterator: db.DB.NewIterator(util.BytesPrefix(prefix), nil), 323 } 324 } 325 326 // NewIteratorWithStartAndPrefix creates a lexicographically ordered iterator 327 // over the database starting at start and ignoring keys that do not start with 328 // the provided prefix 329 func (db *Database) NewIteratorWithStartAndPrefix(start, prefix []byte) database.Iterator { 330 iterRange := util.BytesPrefix(prefix) 331 if bytes.Compare(start, prefix) == 1 { 332 iterRange.Start = start 333 } 334 return &iter{ 335 db: db, 336 Iterator: db.DB.NewIterator(iterRange, nil), 337 } 338 } 339 340 // This comment is basically copy pasted from the underlying levelDB library: 341 342 // Compact the underlying DB for the given key range. 343 // Specifically, deleted and overwritten versions are discarded, 344 // and the data is rearranged to reduce the cost of operations 345 // needed to access the data. This operation should typically only 346 // be invoked by users who understand the underlying implementation. 347 // 348 // A nil start is treated as a key before all keys in the DB. 349 // And a nil limit is treated as a key after all keys in the DB. 350 // Therefore if both are nil then it will compact entire DB. 351 func (db *Database) Compact(start []byte, limit []byte) error { 352 return updateError(db.DB.CompactRange(util.Range{Start: start, Limit: limit})) 353 } 354 355 func (db *Database) Close() error { 356 db.closed.Set(true) 357 db.closeOnce.Do(func() { 358 close(db.closeCh) 359 }) 360 db.closeWg.Wait() 361 return updateError(db.DB.Close()) 362 } 363 364 func (db *Database) HealthCheck(context.Context) (interface{}, error) { 365 if db.closed.Get() { 366 return nil, database.ErrClosed 367 } 368 return nil, nil 369 } 370 371 // batch is a wrapper around a levelDB batch to contain sizes. 372 type batch struct { 373 leveldb.Batch 374 db *Database 375 size int 376 } 377 378 // Put the value into the batch for later writing 379 func (b *batch) Put(key, value []byte) error { 380 b.Batch.Put(key, value) 381 b.size += len(key) + len(value) + levelDBByteOverhead 382 return nil 383 } 384 385 // Delete the key during writing 386 func (b *batch) Delete(key []byte) error { 387 b.Batch.Delete(key) 388 b.size += len(key) + levelDBByteOverhead 389 return nil 390 } 391 392 // Size retrieves the amount of data queued up for writing. 393 func (b *batch) Size() int { 394 return b.size 395 } 396 397 // Write flushes any accumulated data to disk. 398 func (b *batch) Write() error { 399 return updateError(b.db.DB.Write(&b.Batch, nil)) 400 } 401 402 // Reset resets the batch for reuse. 403 func (b *batch) Reset() { 404 b.Batch.Reset() 405 b.size = 0 406 } 407 408 // Replay the batch contents. 409 func (b *batch) Replay(w database.KeyValueWriterDeleter) error { 410 replay := &replayer{writerDeleter: w} 411 if err := b.Batch.Replay(replay); err != nil { 412 // Never actually returns an error, because Replay just returns nil 413 return err 414 } 415 return replay.err 416 } 417 418 // Inner returns itself 419 func (b *batch) Inner() database.Batch { 420 return b 421 } 422 423 type replayer struct { 424 writerDeleter database.KeyValueWriterDeleter 425 err error 426 } 427 428 func (r *replayer) Put(key, value []byte) { 429 if r.err != nil { 430 return 431 } 432 r.err = r.writerDeleter.Put(key, value) 433 } 434 435 func (r *replayer) Delete(key []byte) { 436 if r.err != nil { 437 return 438 } 439 r.err = r.writerDeleter.Delete(key) 440 } 441 442 type iter struct { 443 db *Database 444 iterator.Iterator 445 446 key, val []byte 447 err error 448 } 449 450 func (it *iter) Next() bool { 451 // Short-circuit and set an error if the underlying database has been closed. 452 if it.db.closed.Get() { 453 it.key = nil 454 it.val = nil 455 it.err = database.ErrClosed 456 return false 457 } 458 459 hasNext := it.Iterator.Next() 460 if hasNext { 461 it.key = slices.Clone(it.Iterator.Key()) 462 it.val = slices.Clone(it.Iterator.Value()) 463 } else { 464 it.key = nil 465 it.val = nil 466 } 467 return hasNext 468 } 469 470 func (it *iter) Error() error { 471 if it.err != nil { 472 return it.err 473 } 474 return updateError(it.Iterator.Error()) 475 } 476 477 func (it *iter) Key() []byte { 478 return it.key 479 } 480 481 func (it *iter) Value() []byte { 482 return it.val 483 } 484 485 func updateError(err error) error { 486 switch err { 487 case leveldb.ErrClosed: 488 return database.ErrClosed 489 case leveldb.ErrNotFound: 490 return database.ErrNotFound 491 default: 492 return err 493 } 494 }