github.com/ethereum/go-ethereum@v1.14.4-0.20240516095835-473ee8fc07a3/eth/protocols/snap/gentrie.go (about) 1 // Copyright 2024 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package snap 18 19 import ( 20 "bytes" 21 22 "github.com/ethereum/go-ethereum/common" 23 "github.com/ethereum/go-ethereum/core/rawdb" 24 "github.com/ethereum/go-ethereum/ethdb" 25 "github.com/ethereum/go-ethereum/trie" 26 ) 27 28 // genTrie interface is used by the snap syncer to generate merkle tree nodes 29 // based on a received batch of states. 30 type genTrie interface { 31 // update inserts the state item into generator trie. 32 update(key, value []byte) error 33 34 // commit flushes the right boundary nodes if complete flag is true. This 35 // function must be called before flushing the associated database batch. 36 commit(complete bool) common.Hash 37 } 38 39 // pathTrie is a wrapper over the stackTrie, incorporating numerous additional 40 // logics to handle the semi-completed trie and potential leftover dangling 41 // nodes in the database. It is utilized for constructing the merkle tree nodes 42 // in path mode during the snap sync process. 43 type pathTrie struct { 44 owner common.Hash // identifier of trie owner, empty for account trie 45 tr *trie.StackTrie // underlying raw stack trie 46 first []byte // the path of first committed node by stackTrie 47 last []byte // the path of last committed node by stackTrie 48 49 // This flag indicates whether nodes on the left boundary are skipped for 50 // committing. If set, the left boundary nodes are considered incomplete 51 // due to potentially missing left children. 52 skipLeftBoundary bool 53 db ethdb.KeyValueReader 54 batch ethdb.Batch 55 } 56 57 // newPathTrie initializes the path trie. 58 func newPathTrie(owner common.Hash, skipLeftBoundary bool, db ethdb.KeyValueReader, batch ethdb.Batch) *pathTrie { 59 tr := &pathTrie{ 60 owner: owner, 61 skipLeftBoundary: skipLeftBoundary, 62 db: db, 63 batch: batch, 64 } 65 tr.tr = trie.NewStackTrie(tr.onTrieNode) 66 return tr 67 } 68 69 // onTrieNode is invoked whenever a new node is committed by the stackTrie. 70 // 71 // As the committed nodes might be incomplete if they are on the boundaries 72 // (left or right), this function has the ability to detect the incomplete 73 // ones and filter them out for committing. 74 // 75 // Additionally, the assumption is made that there may exist leftover dangling 76 // nodes in the database. This function has the ability to detect the dangling 77 // nodes that fall within the path space of committed nodes (specifically on 78 // the path covered by internal extension nodes) and remove them from the 79 // database. This property ensures that the entire path space is uniquely 80 // occupied by committed nodes. 81 // 82 // Furthermore, all leftover dangling nodes along the path from committed nodes 83 // to the trie root (left and right boundaries) should be removed as well; 84 // otherwise, they might potentially disrupt the state healing process. 85 func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) { 86 // Filter out the nodes on the left boundary if skipLeftBoundary is 87 // configured. Nodes are considered to be on the left boundary if 88 // it's the first one to be committed, or the parent/ancestor of the 89 // first committed node. 90 if t.skipLeftBoundary && (t.first == nil || bytes.HasPrefix(t.first, path)) { 91 if t.first == nil { 92 // Memorize the path of first committed node, which is regarded 93 // as left boundary. Deep-copy is necessary as the path given 94 // is volatile. 95 t.first = append([]byte{}, path...) 96 97 // The left boundary can be uniquely determined by the first committed node 98 // from stackTrie (e.g., N_1), as the shared path prefix between the first 99 // two inserted state items is deterministic (the path of N_3). The path 100 // from trie root towards the first committed node is considered the left 101 // boundary. The potential leftover dangling nodes on left boundary should 102 // be cleaned out. 103 // 104 // +-----+ 105 // | N_3 | shared path prefix of state_1 and state_2 106 // +-----+ 107 // /- -\ 108 // +-----+ +-----+ 109 // First committed node | N_1 | | N_2 | latest inserted node (contain state_2) 110 // +-----+ +-----+ 111 // 112 // The node with the path of the first committed one (e.g, N_1) is not 113 // removed because it's a sibling of the nodes we want to commit, not 114 // the parent or ancestor. 115 for i := 0; i < len(path); i++ { 116 t.delete(path[:i], false) 117 } 118 } 119 return 120 } 121 // If boundary filtering is not configured, or the node is not on the left 122 // boundary, commit it to database. 123 // 124 // Note: If the current committed node is an extension node, then the nodes 125 // falling within the path between itself and its standalone (not embedded 126 // in parent) child should be cleaned out for exclusively occupy the inner 127 // path. 128 // 129 // This is essential in snap sync to avoid leaving dangling nodes within 130 // this range covered by extension node which could potentially break the 131 // state healing. 132 // 133 // The extension node is detected if its path is the prefix of last committed 134 // one and path gap is larger than one. If the path gap is only one byte, 135 // the current node could either be a full node, or an extension with single 136 // byte key. In either case, no gaps will be left in the path. 137 if t.last != nil && bytes.HasPrefix(t.last, path) && len(t.last)-len(path) > 1 { 138 for i := len(path) + 1; i < len(t.last); i++ { 139 t.delete(t.last[:i], true) 140 } 141 } 142 t.write(path, blob) 143 144 // Update the last flag. Deep-copy is necessary as the provided path is volatile. 145 if t.last == nil { 146 t.last = append([]byte{}, path...) 147 } else { 148 t.last = append(t.last[:0], path...) 149 } 150 } 151 152 // write commits the node write to provided database batch in path mode. 153 func (t *pathTrie) write(path []byte, blob []byte) { 154 if t.owner == (common.Hash{}) { 155 rawdb.WriteAccountTrieNode(t.batch, path, blob) 156 } else { 157 rawdb.WriteStorageTrieNode(t.batch, t.owner, path, blob) 158 } 159 } 160 161 func (t *pathTrie) deleteAccountNode(path []byte, inner bool) { 162 if inner { 163 accountInnerLookupGauge.Inc(1) 164 } else { 165 accountOuterLookupGauge.Inc(1) 166 } 167 if !rawdb.HasAccountTrieNode(t.db, path) { 168 return 169 } 170 if inner { 171 accountInnerDeleteGauge.Inc(1) 172 } else { 173 accountOuterDeleteGauge.Inc(1) 174 } 175 rawdb.DeleteAccountTrieNode(t.batch, path) 176 } 177 178 func (t *pathTrie) deleteStorageNode(path []byte, inner bool) { 179 if inner { 180 storageInnerLookupGauge.Inc(1) 181 } else { 182 storageOuterLookupGauge.Inc(1) 183 } 184 if !rawdb.HasStorageTrieNode(t.db, t.owner, path) { 185 return 186 } 187 if inner { 188 storageInnerDeleteGauge.Inc(1) 189 } else { 190 storageOuterDeleteGauge.Inc(1) 191 } 192 rawdb.DeleteStorageTrieNode(t.batch, t.owner, path) 193 } 194 195 // delete commits the node deletion to provided database batch in path mode. 196 func (t *pathTrie) delete(path []byte, inner bool) { 197 if t.owner == (common.Hash{}) { 198 t.deleteAccountNode(path, inner) 199 } else { 200 t.deleteStorageNode(path, inner) 201 } 202 } 203 204 // update implements genTrie interface, inserting a (key, value) pair into the 205 // stack trie. 206 func (t *pathTrie) update(key, value []byte) error { 207 return t.tr.Update(key, value) 208 } 209 210 // commit implements genTrie interface, flushing the right boundary if it's 211 // considered as complete. Otherwise, the nodes on the right boundary are 212 // discarded and cleaned up. 213 // 214 // Note, this function must be called before flushing database batch, otherwise, 215 // dangling nodes might be left in database. 216 func (t *pathTrie) commit(complete bool) common.Hash { 217 // If the right boundary is claimed as complete, flush them out. 218 // The nodes on both left and right boundary will still be filtered 219 // out if left boundary filtering is configured. 220 if complete { 221 // Commit all inserted but not yet committed nodes(on the right 222 // boundary) in the stackTrie. 223 hash := t.tr.Hash() 224 if t.skipLeftBoundary { 225 return common.Hash{} // hash is meaningless if left side is incomplete 226 } 227 return hash 228 } 229 // Discard nodes on the right boundary as it's claimed as incomplete. These 230 // nodes might be incomplete due to missing children on the right side. 231 // Furthermore, the potential leftover nodes on right boundary should also 232 // be cleaned out. 233 // 234 // The right boundary can be uniquely determined by the last committed node 235 // from stackTrie (e.g., N_1), as the shared path prefix between the last 236 // two inserted state items is deterministic (the path of N_3). The path 237 // from trie root towards the last committed node is considered the right 238 // boundary (root to N_3). 239 // 240 // +-----+ 241 // | N_3 | shared path prefix of last two states 242 // +-----+ 243 // /- -\ 244 // +-----+ +-----+ 245 // Last committed node | N_1 | | N_2 | latest inserted node (contain last state) 246 // +-----+ +-----+ 247 // 248 // Another interesting scenario occurs when the trie is committed due to 249 // too many items being accumulated in the batch. To flush them out to 250 // the database, the path of the last inserted node (N_2) is temporarily 251 // treated as an incomplete right boundary, and nodes on this path are 252 // removed (e.g. from root to N_3). 253 // However, this path will be reclaimed as an internal path by inserting 254 // more items after the batch flush. New nodes on this path can be committed 255 // with no issues as they are actually complete. Also, from a database 256 // perspective, first deleting and then rewriting is a valid data update. 257 for i := 0; i < len(t.last); i++ { 258 t.delete(t.last[:i], false) 259 } 260 return common.Hash{} // the hash is meaningless for incomplete commit 261 } 262 263 // hashTrie is a wrapper over the stackTrie for implementing genTrie interface. 264 type hashTrie struct { 265 tr *trie.StackTrie 266 } 267 268 // newHashTrie initializes the hash trie. 269 func newHashTrie(batch ethdb.Batch) *hashTrie { 270 return &hashTrie{tr: trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { 271 rawdb.WriteLegacyTrieNode(batch, hash, blob) 272 })} 273 } 274 275 // update implements genTrie interface, inserting a (key, value) pair into 276 // the stack trie. 277 func (t *hashTrie) update(key, value []byte) error { 278 return t.tr.Update(key, value) 279 } 280 281 // commit implements genTrie interface, committing the nodes on right boundary. 282 func (t *hashTrie) commit(complete bool) common.Hash { 283 if !complete { 284 return common.Hash{} // the hash is meaningless for incomplete commit 285 } 286 return t.tr.Hash() // return hash only if it's claimed as complete 287 }