github.com/ethereum/go-ethereum@v1.16.1/eth/protocols/snap/gentrie.go (about) 1 // Copyright 2024 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package snap 18 19 import ( 20 "bytes" 21 22 "github.com/ethereum/go-ethereum/common" 23 "github.com/ethereum/go-ethereum/core/rawdb" 24 "github.com/ethereum/go-ethereum/ethdb" 25 "github.com/ethereum/go-ethereum/trie" 26 ) 27 28 // genTrie interface is used by the snap syncer to generate merkle tree nodes 29 // based on a received batch of states. 30 type genTrie interface { 31 // update inserts the state item into generator trie. 32 update(key, value []byte) error 33 34 // delete removes the state item from the generator trie. 35 delete(key []byte) error 36 37 // commit flushes the right boundary nodes if complete flag is true. This 38 // function must be called before flushing the associated database batch. 39 commit(complete bool) common.Hash 40 } 41 42 // pathTrie is a wrapper over the stackTrie, incorporating numerous additional 43 // logics to handle the semi-completed trie and potential leftover dangling 44 // nodes in the database. It is utilized for constructing the merkle tree nodes 45 // in path mode during the snap sync process. 46 type pathTrie struct { 47 owner common.Hash // identifier of trie owner, empty for account trie 48 tr *trie.StackTrie // underlying raw stack trie 49 first []byte // the path of first committed node by stackTrie 50 last []byte // the path of last committed node by stackTrie 51 52 // This flag indicates whether nodes on the left boundary are skipped for 53 // committing. If set, the left boundary nodes are considered incomplete 54 // due to potentially missing left children. 55 skipLeftBoundary bool 56 db ethdb.KeyValueReader 57 batch ethdb.Batch 58 } 59 60 // newPathTrie initializes the path trie. 61 func newPathTrie(owner common.Hash, skipLeftBoundary bool, db ethdb.KeyValueReader, batch ethdb.Batch) *pathTrie { 62 tr := &pathTrie{ 63 owner: owner, 64 skipLeftBoundary: skipLeftBoundary, 65 db: db, 66 batch: batch, 67 } 68 tr.tr = trie.NewStackTrie(tr.onTrieNode) 69 return tr 70 } 71 72 // onTrieNode is invoked whenever a new node is committed by the stackTrie. 73 // 74 // As the committed nodes might be incomplete if they are on the boundaries 75 // (left or right), this function has the ability to detect the incomplete 76 // ones and filter them out for committing. 77 // 78 // Additionally, the assumption is made that there may exist leftover dangling 79 // nodes in the database. This function has the ability to detect the dangling 80 // nodes that fall within the path space of committed nodes (specifically on 81 // the path covered by internal extension nodes) and remove them from the 82 // database. This property ensures that the entire path space is uniquely 83 // occupied by committed nodes. 84 // 85 // Furthermore, all leftover dangling nodes along the path from committed nodes 86 // to the trie root (left and right boundaries) should be removed as well; 87 // otherwise, they might potentially disrupt the state healing process. 88 func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) { 89 // Filter out the nodes on the left boundary if skipLeftBoundary is 90 // configured. Nodes are considered to be on the left boundary if 91 // it's the first one to be committed, or the parent/ancestor of the 92 // first committed node. 93 if t.skipLeftBoundary && (t.first == nil || bytes.HasPrefix(t.first, path)) { 94 if t.first == nil { 95 // Memorize the path of first committed node, which is regarded 96 // as left boundary. Deep-copy is necessary as the path given 97 // is volatile. 98 t.first = append([]byte{}, path...) 99 100 // The left boundary can be uniquely determined by the first committed node 101 // from stackTrie (e.g., N_1), as the shared path prefix between the first 102 // two inserted state items is deterministic (the path of N_3). The path 103 // from trie root towards the first committed node is considered the left 104 // boundary. The potential leftover dangling nodes on left boundary should 105 // be cleaned out. 106 // 107 // +-----+ 108 // | N_3 | shared path prefix of state_1 and state_2 109 // +-----+ 110 // /- -\ 111 // +-----+ +-----+ 112 // First committed node | N_1 | | N_2 | latest inserted node (contain state_2) 113 // +-----+ +-----+ 114 // 115 // The node with the path of the first committed one (e.g, N_1) is not 116 // removed because it's a sibling of the nodes we want to commit, not 117 // the parent or ancestor. 118 for i := 0; i < len(path); i++ { 119 t.deleteNode(path[:i], false) 120 } 121 } 122 return 123 } 124 // If boundary filtering is not configured, or the node is not on the left 125 // boundary, commit it to database. 126 // 127 // Note: If the current committed node is an extension node, then the nodes 128 // falling within the path between itself and its standalone (not embedded 129 // in parent) child should be cleaned out for exclusively occupy the inner 130 // path. 131 // 132 // This is essential in snap sync to avoid leaving dangling nodes within 133 // this range covered by extension node which could potentially break the 134 // state healing. 135 // 136 // The extension node is detected if its path is the prefix of last committed 137 // one and path gap is larger than one. If the path gap is only one byte, 138 // the current node could either be a full node, or an extension with single 139 // byte key. In either case, no gaps will be left in the path. 140 if t.last != nil && bytes.HasPrefix(t.last, path) && len(t.last)-len(path) > 1 { 141 for i := len(path) + 1; i < len(t.last); i++ { 142 t.deleteNode(t.last[:i], true) 143 } 144 } 145 t.write(path, blob) 146 147 // Update the last flag. Deep-copy is necessary as the provided path is volatile. 148 if t.last == nil { 149 t.last = append([]byte{}, path...) 150 } else { 151 t.last = append(t.last[:0], path...) 152 } 153 } 154 155 // write commits the node write to provided database batch in path mode. 156 func (t *pathTrie) write(path []byte, blob []byte) { 157 if t.owner == (common.Hash{}) { 158 rawdb.WriteAccountTrieNode(t.batch, path, blob) 159 } else { 160 rawdb.WriteStorageTrieNode(t.batch, t.owner, path, blob) 161 } 162 } 163 164 func (t *pathTrie) deleteAccountNode(path []byte, inner bool) { 165 if inner { 166 accountInnerLookupGauge.Inc(1) 167 } else { 168 accountOuterLookupGauge.Inc(1) 169 } 170 if !rawdb.HasAccountTrieNode(t.db, path) { 171 return 172 } 173 if inner { 174 accountInnerDeleteGauge.Inc(1) 175 } else { 176 accountOuterDeleteGauge.Inc(1) 177 } 178 rawdb.DeleteAccountTrieNode(t.batch, path) 179 } 180 181 func (t *pathTrie) deleteStorageNode(path []byte, inner bool) { 182 if inner { 183 storageInnerLookupGauge.Inc(1) 184 } else { 185 storageOuterLookupGauge.Inc(1) 186 } 187 if !rawdb.HasStorageTrieNode(t.db, t.owner, path) { 188 return 189 } 190 if inner { 191 storageInnerDeleteGauge.Inc(1) 192 } else { 193 storageOuterDeleteGauge.Inc(1) 194 } 195 rawdb.DeleteStorageTrieNode(t.batch, t.owner, path) 196 } 197 198 // deleteNode commits the node deletion to provided database batch in path mode. 199 func (t *pathTrie) deleteNode(path []byte, inner bool) { 200 if t.owner == (common.Hash{}) { 201 t.deleteAccountNode(path, inner) 202 } else { 203 t.deleteStorageNode(path, inner) 204 } 205 } 206 207 // update implements genTrie interface, inserting a (key, value) pair into the 208 // stack trie. 209 func (t *pathTrie) update(key, value []byte) error { 210 return t.tr.Update(key, value) 211 } 212 213 // delete implements genTrie interface, deleting the item from the stack trie. 214 func (t *pathTrie) delete(key []byte) error { 215 // Commit the trie since the right boundary is incomplete because 216 // of the deleted item. This will implicitly discard the last inserted 217 // item and clean some ancestor trie nodes of the last committed 218 // item in the database. 219 t.commit(false) 220 221 // Reset the trie and all the internal trackers 222 t.first = nil 223 t.last = nil 224 t.tr.Reset() 225 226 // Explicitly mark the left boundary as incomplete, as the left-side 227 // item of the next one has been deleted. Be aware that the next item 228 // to be inserted will be ignored from committing as well as it's on 229 // the left boundary. 230 t.skipLeftBoundary = true 231 232 // Explicitly delete the potential leftover nodes on the specific 233 // path from the database. 234 tkey := t.tr.TrieKey(key) 235 for i := 0; i <= len(tkey); i++ { 236 t.deleteNode(tkey[:i], false) 237 } 238 return nil 239 } 240 241 // commit implements genTrie interface, flushing the right boundary if it's 242 // considered as complete. Otherwise, the nodes on the right boundary are 243 // discarded and cleaned up. 244 // 245 // Note, this function must be called before flushing database batch, otherwise, 246 // dangling nodes might be left in database. 247 func (t *pathTrie) commit(complete bool) common.Hash { 248 // If the right boundary is claimed as complete, flush them out. 249 // The nodes on both left and right boundary will still be filtered 250 // out if left boundary filtering is configured. 251 if complete { 252 // Commit all inserted but not yet committed nodes(on the right 253 // boundary) in the stackTrie. 254 hash := t.tr.Hash() 255 if t.skipLeftBoundary { 256 return common.Hash{} // hash is meaningless if left side is incomplete 257 } 258 return hash 259 } 260 // Discard nodes on the right boundary as it's claimed as incomplete. These 261 // nodes might be incomplete due to missing children on the right side. 262 // Furthermore, the potential leftover nodes on right boundary should also 263 // be cleaned out. 264 // 265 // The right boundary can be uniquely determined by the last committed node 266 // from stackTrie (e.g., N_1), as the shared path prefix between the last 267 // two inserted state items is deterministic (the path of N_3). The path 268 // from trie root towards the last committed node is considered the right 269 // boundary (root to N_3). 270 // 271 // +-----+ 272 // | N_3 | shared path prefix of last two states 273 // +-----+ 274 // /- -\ 275 // +-----+ +-----+ 276 // Last committed node | N_1 | | N_2 | latest inserted node (contain last state) 277 // +-----+ +-----+ 278 // 279 // Another interesting scenario occurs when the trie is committed due to 280 // too many items being accumulated in the batch. To flush them out to 281 // the database, the path of the last inserted node (N_2) is temporarily 282 // treated as an incomplete right boundary, and nodes on this path are 283 // removed (e.g. from root to N_3). 284 // However, this path will be reclaimed as an internal path by inserting 285 // more items after the batch flush. New nodes on this path can be committed 286 // with no issues as they are actually complete. Also, from a database 287 // perspective, first deleting and then rewriting is a valid data update. 288 for i := 0; i < len(t.last); i++ { 289 t.deleteNode(t.last[:i], false) 290 } 291 return common.Hash{} // the hash is meaningless for incomplete commit 292 } 293 294 // hashTrie is a wrapper over the stackTrie for implementing genTrie interface. 295 type hashTrie struct { 296 tr *trie.StackTrie 297 } 298 299 // newHashTrie initializes the hash trie. 300 func newHashTrie(batch ethdb.Batch) *hashTrie { 301 return &hashTrie{tr: trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { 302 rawdb.WriteLegacyTrieNode(batch, hash, blob) 303 })} 304 } 305 306 // update implements genTrie interface, inserting a (key, value) pair into 307 // the stack trie. 308 func (t *hashTrie) update(key, value []byte) error { 309 return t.tr.Update(key, value) 310 } 311 312 // delete implements genTrie interface, ignoring the state item for deleting. 313 func (t *hashTrie) delete(key []byte) error { return nil } 314 315 // commit implements genTrie interface, committing the nodes on right boundary. 316 func (t *hashTrie) commit(complete bool) common.Hash { 317 if !complete { 318 return common.Hash{} // the hash is meaningless for incomplete commit 319 } 320 return t.tr.Hash() // return hash only if it's claimed as complete 321 }