github.com/baptiste-b-pegasys/quorum/v22@v22.4.2/core/state/snapshot/journal.go (about) 1 // Copyright 2019 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package snapshot 18 19 import ( 20 "bytes" 21 "encoding/binary" 22 "errors" 23 "fmt" 24 "io" 25 "time" 26 27 "github.com/VictoriaMetrics/fastcache" 28 "github.com/ethereum/go-ethereum/common" 29 "github.com/ethereum/go-ethereum/core/rawdb" 30 "github.com/ethereum/go-ethereum/ethdb" 31 "github.com/ethereum/go-ethereum/log" 32 "github.com/ethereum/go-ethereum/rlp" 33 "github.com/ethereum/go-ethereum/trie" 34 ) 35 36 const journalVersion uint64 = 0 37 38 // journalGenerator is a disk layer entry containing the generator progress marker. 39 type journalGenerator struct { 40 Wiping bool // Whether the database was in progress of being wiped 41 Done bool // Whether the generator finished creating the snapshot 42 Marker []byte 43 Accounts uint64 44 Slots uint64 45 Storage uint64 46 } 47 48 // journalDestruct is an account deletion entry in a diffLayer's disk journal. 49 type journalDestruct struct { 50 Hash common.Hash 51 } 52 53 // journalAccount is an account entry in a diffLayer's disk journal. 54 type journalAccount struct { 55 Hash common.Hash 56 Blob []byte 57 } 58 59 // journalStorage is an account's storage map in a diffLayer's disk journal. 60 type journalStorage struct { 61 Hash common.Hash 62 Keys []common.Hash 63 Vals [][]byte 64 } 65 66 // loadAndParseLegacyJournal tries to parse the snapshot journal in legacy format. 67 func loadAndParseLegacyJournal(db ethdb.KeyValueStore, base *diskLayer) (snapshot, journalGenerator, error) { 68 // Retrieve the journal, for legacy journal it must exist since even for 69 // 0 layer it stores whether we've already generated the snapshot or are 70 // in progress only. 71 journal := rawdb.ReadSnapshotJournal(db) 72 if len(journal) == 0 { 73 return nil, journalGenerator{}, errors.New("missing or corrupted snapshot journal") 74 } 75 r := rlp.NewStream(bytes.NewReader(journal), 0) 76 77 // Read the snapshot generation progress for the disk layer 78 var generator journalGenerator 79 if err := r.Decode(&generator); err != nil { 80 return nil, journalGenerator{}, fmt.Errorf("failed to load snapshot progress marker: %v", err) 81 } 82 // Load all the snapshot diffs from the journal 83 snapshot, err := loadDiffLayer(base, r) 84 if err != nil { 85 return nil, generator, err 86 } 87 return snapshot, generator, nil 88 } 89 90 // loadAndParseJournal tries to parse the snapshot journal in latest format. 91 func loadAndParseJournal(db ethdb.KeyValueStore, base *diskLayer) (snapshot, journalGenerator, error) { 92 // Retrieve the disk layer generator. It must exist, no matter the 93 // snapshot is fully generated or not. Otherwise the entire disk 94 // layer is invalid. 95 generatorBlob := rawdb.ReadSnapshotGenerator(db) 96 if len(generatorBlob) == 0 { 97 return nil, journalGenerator{}, errors.New("missing snapshot generator") 98 } 99 var generator journalGenerator 100 if err := rlp.DecodeBytes(generatorBlob, &generator); err != nil { 101 return nil, journalGenerator{}, fmt.Errorf("failed to decode snapshot generator: %v", err) 102 } 103 // Retrieve the diff layer journal. It's possible that the journal is 104 // not existent, e.g. the disk layer is generating while that the Geth 105 // crashes without persisting the diff journal. 106 // So if there is no journal, or the journal is invalid(e.g. the journal 107 // is not matched with disk layer; or the it's the legacy-format journal, 108 // etc.), we just discard all diffs and try to recover them later. 109 journal := rawdb.ReadSnapshotJournal(db) 110 if len(journal) == 0 { 111 log.Warn("Loaded snapshot journal", "diskroot", base.root, "diffs", "missing") 112 return base, generator, nil 113 } 114 r := rlp.NewStream(bytes.NewReader(journal), 0) 115 116 // Firstly, resolve the first element as the journal version 117 version, err := r.Uint() 118 if err != nil { 119 log.Warn("Failed to resolve the journal version", "error", err) 120 return base, generator, nil 121 } 122 if version != journalVersion { 123 log.Warn("Discarded the snapshot journal with wrong version", "required", journalVersion, "got", version) 124 return base, generator, nil 125 } 126 // Secondly, resolve the disk layer root, ensure it's continuous 127 // with disk layer. Note now we can ensure it's the snapshot journal 128 // correct version, so we expect everything can be resolved properly. 129 var root common.Hash 130 if err := r.Decode(&root); err != nil { 131 return nil, journalGenerator{}, errors.New("missing disk layer root") 132 } 133 // The diff journal is not matched with disk, discard them. 134 // It can happen that Geth crashes without persisting the latest 135 // diff journal. 136 if !bytes.Equal(root.Bytes(), base.root.Bytes()) { 137 log.Warn("Loaded snapshot journal", "diskroot", base.root, "diffs", "unmatched") 138 return base, generator, nil 139 } 140 // Load all the snapshot diffs from the journal 141 snapshot, err := loadDiffLayer(base, r) 142 if err != nil { 143 return nil, journalGenerator{}, err 144 } 145 log.Debug("Loaded snapshot journal", "diskroot", base.root, "diffhead", snapshot.Root()) 146 return snapshot, generator, nil 147 } 148 149 // loadSnapshot loads a pre-existing state snapshot backed by a key-value store. 150 func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, recovery bool) (snapshot, error) { 151 // Retrieve the block number and hash of the snapshot, failing if no snapshot 152 // is present in the database (or crashed mid-update). 153 baseRoot := rawdb.ReadSnapshotRoot(diskdb) 154 if baseRoot == (common.Hash{}) { 155 return nil, errors.New("missing or corrupted snapshot") 156 } 157 base := &diskLayer{ 158 diskdb: diskdb, 159 triedb: triedb, 160 cache: fastcache.New(cache * 1024 * 1024), 161 root: baseRoot, 162 } 163 var legacy bool 164 snapshot, generator, err := loadAndParseJournal(diskdb, base) 165 if err != nil { 166 log.Warn("Failed to load new-format journal", "error", err) 167 snapshot, generator, err = loadAndParseLegacyJournal(diskdb, base) 168 legacy = true 169 } 170 if err != nil { 171 return nil, err 172 } 173 // Entire snapshot journal loaded, sanity check the head. If the loaded 174 // snapshot is not matched with current state root, print a warning log 175 // or discard the entire snapshot it's legacy snapshot. 176 // 177 // Possible scenario: Geth was crashed without persisting journal and then 178 // restart, the head is rewound to the point with available state(trie) 179 // which is below the snapshot. In this case the snapshot can be recovered 180 // by re-executing blocks but right now it's unavailable. 181 if head := snapshot.Root(); head != root { 182 // If it's legacy snapshot, or it's new-format snapshot but 183 // it's not in recovery mode, returns the error here for 184 // rebuilding the entire snapshot forcibly. 185 if legacy || !recovery { 186 return nil, fmt.Errorf("head doesn't match snapshot: have %#x, want %#x", head, root) 187 } 188 // It's in snapshot recovery, the assumption is held that 189 // the disk layer is always higher than chain head. It can 190 // be eventually recovered when the chain head beyonds the 191 // disk layer. 192 log.Warn("Snapshot is not continuous with chain", "snaproot", head, "chainroot", root) 193 } 194 // Everything loaded correctly, resume any suspended operations 195 if !generator.Done { 196 // If the generator was still wiping, restart one from scratch (fine for 197 // now as it's rare and the wiper deletes the stuff it touches anyway, so 198 // restarting won't incur a lot of extra database hops. 199 var wiper chan struct{} 200 if generator.Wiping { 201 log.Info("Resuming previous snapshot wipe") 202 wiper = wipeSnapshot(diskdb, false) 203 } 204 // Whether or not wiping was in progress, load any generator progress too 205 base.genMarker = generator.Marker 206 if base.genMarker == nil { 207 base.genMarker = []byte{} 208 } 209 base.genPending = make(chan struct{}) 210 base.genAbort = make(chan chan *generatorStats) 211 212 var origin uint64 213 if len(generator.Marker) >= 8 { 214 origin = binary.BigEndian.Uint64(generator.Marker) 215 } 216 go base.generate(&generatorStats{ 217 wiping: wiper, 218 origin: origin, 219 start: time.Now(), 220 accounts: generator.Accounts, 221 slots: generator.Slots, 222 storage: common.StorageSize(generator.Storage), 223 }) 224 } 225 return snapshot, nil 226 } 227 228 // loadDiffLayer reads the next sections of a snapshot journal, reconstructing a new 229 // diff and verifying that it can be linked to the requested parent. 230 func loadDiffLayer(parent snapshot, r *rlp.Stream) (snapshot, error) { 231 // Read the next diff journal entry 232 var root common.Hash 233 if err := r.Decode(&root); err != nil { 234 // The first read may fail with EOF, marking the end of the journal 235 if err == io.EOF { 236 return parent, nil 237 } 238 return nil, fmt.Errorf("load diff root: %v", err) 239 } 240 var destructs []journalDestruct 241 if err := r.Decode(&destructs); err != nil { 242 return nil, fmt.Errorf("load diff destructs: %v", err) 243 } 244 destructSet := make(map[common.Hash]struct{}) 245 for _, entry := range destructs { 246 destructSet[entry.Hash] = struct{}{} 247 } 248 var accounts []journalAccount 249 if err := r.Decode(&accounts); err != nil { 250 return nil, fmt.Errorf("load diff accounts: %v", err) 251 } 252 accountData := make(map[common.Hash][]byte) 253 for _, entry := range accounts { 254 if len(entry.Blob) > 0 { // RLP loses nil-ness, but `[]byte{}` is not a valid item, so reinterpret that 255 accountData[entry.Hash] = entry.Blob 256 } else { 257 accountData[entry.Hash] = nil 258 } 259 } 260 var storage []journalStorage 261 if err := r.Decode(&storage); err != nil { 262 return nil, fmt.Errorf("load diff storage: %v", err) 263 } 264 storageData := make(map[common.Hash]map[common.Hash][]byte) 265 for _, entry := range storage { 266 slots := make(map[common.Hash][]byte) 267 for i, key := range entry.Keys { 268 if len(entry.Vals[i]) > 0 { // RLP loses nil-ness, but `[]byte{}` is not a valid item, so reinterpret that 269 slots[key] = entry.Vals[i] 270 } else { 271 slots[key] = nil 272 } 273 } 274 storageData[entry.Hash] = slots 275 } 276 return loadDiffLayer(newDiffLayer(parent, root, destructSet, accountData, storageData), r) 277 } 278 279 // Journal terminates any in-progress snapshot generation, also implicitly pushing 280 // the progress into the database. 281 func (dl *diskLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) { 282 // If the snapshot is currently being generated, abort it 283 var stats *generatorStats 284 if dl.genAbort != nil { 285 abort := make(chan *generatorStats) 286 dl.genAbort <- abort 287 288 if stats = <-abort; stats != nil { 289 stats.Log("Journalling in-progress snapshot", dl.root, dl.genMarker) 290 } 291 } 292 // Ensure the layer didn't get stale 293 dl.lock.RLock() 294 defer dl.lock.RUnlock() 295 296 if dl.stale { 297 return common.Hash{}, ErrSnapshotStale 298 } 299 // Ensure the generator stats is written even if none was ran this cycle 300 journalProgress(dl.diskdb, dl.genMarker, stats) 301 302 log.Debug("Journalled disk layer", "root", dl.root) 303 return dl.root, nil 304 } 305 306 // Journal writes the memory layer contents into a buffer to be stored in the 307 // database as the snapshot journal. 308 func (dl *diffLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) { 309 // Journal the parent first 310 base, err := dl.parent.Journal(buffer) 311 if err != nil { 312 return common.Hash{}, err 313 } 314 // Ensure the layer didn't get stale 315 dl.lock.RLock() 316 defer dl.lock.RUnlock() 317 318 if dl.Stale() { 319 return common.Hash{}, ErrSnapshotStale 320 } 321 // Everything below was journalled, persist this layer too 322 if err := rlp.Encode(buffer, dl.root); err != nil { 323 return common.Hash{}, err 324 } 325 destructs := make([]journalDestruct, 0, len(dl.destructSet)) 326 for hash := range dl.destructSet { 327 destructs = append(destructs, journalDestruct{Hash: hash}) 328 } 329 if err := rlp.Encode(buffer, destructs); err != nil { 330 return common.Hash{}, err 331 } 332 accounts := make([]journalAccount, 0, len(dl.accountData)) 333 for hash, blob := range dl.accountData { 334 accounts = append(accounts, journalAccount{Hash: hash, Blob: blob}) 335 } 336 if err := rlp.Encode(buffer, accounts); err != nil { 337 return common.Hash{}, err 338 } 339 storage := make([]journalStorage, 0, len(dl.storageData)) 340 for hash, slots := range dl.storageData { 341 keys := make([]common.Hash, 0, len(slots)) 342 vals := make([][]byte, 0, len(slots)) 343 for key, val := range slots { 344 keys = append(keys, key) 345 vals = append(vals, val) 346 } 347 storage = append(storage, journalStorage{Hash: hash, Keys: keys, Vals: vals}) 348 } 349 if err := rlp.Encode(buffer, storage); err != nil { 350 return common.Hash{}, err 351 } 352 log.Debug("Journalled diff layer", "root", dl.root, "parent", dl.parent.Root()) 353 return base, nil 354 } 355 356 // LegacyJournal writes the persistent layer generator stats into a buffer 357 // to be stored in the database as the snapshot journal. 358 // 359 // Note it's the legacy version which is only used in testing right now. 360 func (dl *diskLayer) LegacyJournal(buffer *bytes.Buffer) (common.Hash, error) { 361 // If the snapshot is currently being generated, abort it 362 var stats *generatorStats 363 if dl.genAbort != nil { 364 abort := make(chan *generatorStats) 365 dl.genAbort <- abort 366 367 if stats = <-abort; stats != nil { 368 stats.Log("Journalling in-progress snapshot", dl.root, dl.genMarker) 369 } 370 } 371 // Ensure the layer didn't get stale 372 dl.lock.RLock() 373 defer dl.lock.RUnlock() 374 375 if dl.stale { 376 return common.Hash{}, ErrSnapshotStale 377 } 378 // Write out the generator marker 379 entry := journalGenerator{ 380 Done: dl.genMarker == nil, 381 Marker: dl.genMarker, 382 } 383 if stats != nil { 384 entry.Wiping = (stats.wiping != nil) 385 entry.Accounts = stats.accounts 386 entry.Slots = stats.slots 387 entry.Storage = uint64(stats.storage) 388 } 389 log.Debug("Legacy journalled disk layer", "root", dl.root) 390 if err := rlp.Encode(buffer, entry); err != nil { 391 return common.Hash{}, err 392 } 393 return dl.root, nil 394 } 395 396 // Journal writes the memory layer contents into a buffer to be stored in the 397 // database as the snapshot journal. 398 // 399 // Note it's the legacy version which is only used in testing right now. 400 func (dl *diffLayer) LegacyJournal(buffer *bytes.Buffer) (common.Hash, error) { 401 // Journal the parent first 402 base, err := dl.parent.LegacyJournal(buffer) 403 if err != nil { 404 return common.Hash{}, err 405 } 406 // Ensure the layer didn't get stale 407 dl.lock.RLock() 408 defer dl.lock.RUnlock() 409 410 if dl.Stale() { 411 return common.Hash{}, ErrSnapshotStale 412 } 413 // Everything below was journalled, persist this layer too 414 if err := rlp.Encode(buffer, dl.root); err != nil { 415 return common.Hash{}, err 416 } 417 destructs := make([]journalDestruct, 0, len(dl.destructSet)) 418 for hash := range dl.destructSet { 419 destructs = append(destructs, journalDestruct{Hash: hash}) 420 } 421 if err := rlp.Encode(buffer, destructs); err != nil { 422 return common.Hash{}, err 423 } 424 accounts := make([]journalAccount, 0, len(dl.accountData)) 425 for hash, blob := range dl.accountData { 426 accounts = append(accounts, journalAccount{Hash: hash, Blob: blob}) 427 } 428 if err := rlp.Encode(buffer, accounts); err != nil { 429 return common.Hash{}, err 430 } 431 storage := make([]journalStorage, 0, len(dl.storageData)) 432 for hash, slots := range dl.storageData { 433 keys := make([]common.Hash, 0, len(slots)) 434 vals := make([][]byte, 0, len(slots)) 435 for key, val := range slots { 436 keys = append(keys, key) 437 vals = append(vals, val) 438 } 439 storage = append(storage, journalStorage{Hash: hash, Keys: keys, Vals: vals}) 440 } 441 if err := rlp.Encode(buffer, storage); err != nil { 442 return common.Hash{}, err 443 } 444 log.Debug("Legacy journalled diff layer", "root", dl.root, "parent", dl.parent.Root()) 445 return base, nil 446 }