github.com/jiajun1992/watercarver@v0.0.0-20191031150618-dfc2b17c0c4a/go-ethereum/trie/sync.go (about) 1 // Copyright 2015 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package trie 18 19 import ( 20 "errors" 21 "fmt" 22 23 "github.com/ethereum/go-ethereum/common" 24 "github.com/ethereum/go-ethereum/common/prque" 25 "github.com/ethereum/go-ethereum/ethdb" 26 ) 27 28 // ErrNotRequested is returned by the trie sync when it's requested to process a 29 // node it did not request. 30 var ErrNotRequested = errors.New("not requested") 31 32 // ErrAlreadyProcessed is returned by the trie sync when it's requested to process a 33 // node it already processed previously. 34 var ErrAlreadyProcessed = errors.New("already processed") 35 36 // request represents a scheduled or already in-flight state retrieval request. 37 type request struct { 38 hash common.Hash // Hash of the node data content to retrieve 39 data []byte // Data content of the node, cached until all subtrees complete 40 raw bool // Whether this is a raw entry (code) or a trie node 41 42 parents []*request // Parent state nodes referencing this entry (notify all upon completion) 43 depth int // Depth level within the trie the node is located to prioritise DFS 44 deps int // Number of dependencies before allowed to commit this node 45 46 callback LeafCallback // Callback to invoke if a leaf node it reached on this branch 47 } 48 49 // SyncResult is a simple list to return missing nodes along with their request 50 // hashes. 51 type SyncResult struct { 52 Hash common.Hash // Hash of the originally unknown trie node 53 Data []byte // Data content of the retrieved node 54 } 55 56 // syncMemBatch is an in-memory buffer of successfully downloaded but not yet 57 // persisted data items. 58 type syncMemBatch struct { 59 batch map[common.Hash][]byte // In-memory membatch of recently completed items 60 order []common.Hash // Order of completion to prevent out-of-order data loss 61 } 62 63 // newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes. 64 func newSyncMemBatch() *syncMemBatch { 65 return &syncMemBatch{ 66 batch: make(map[common.Hash][]byte), 67 order: make([]common.Hash, 0, 256), 68 } 69 } 70 71 // Sync is the main state trie synchronisation scheduler, which provides yet 72 // unknown trie hashes to retrieve, accepts node data associated with said hashes 73 // and reconstructs the trie step by step until all is done. 74 type Sync struct { 75 database ethdb.KeyValueReader // Persistent database to check for existing entries 76 membatch *syncMemBatch // Memory buffer to avoid frequent database writes 77 requests map[common.Hash]*request // Pending requests pertaining to a key hash 78 queue *prque.Prque // Priority queue with the pending requests 79 bloom *SyncBloom // Bloom filter for fast node existence checks 80 } 81 82 // NewSync creates a new trie data download scheduler. 83 func NewSync(root common.Hash, database ethdb.KeyValueReader, callback LeafCallback, bloom *SyncBloom) *Sync { 84 ts := &Sync{ 85 database: database, 86 membatch: newSyncMemBatch(), 87 requests: make(map[common.Hash]*request), 88 queue: prque.New(nil), 89 bloom: bloom, 90 } 91 ts.AddSubTrie(root, 0, common.Hash{}, callback) 92 return ts 93 } 94 95 // AddSubTrie registers a new trie to the sync code, rooted at the designated parent. 96 func (s *Sync) AddSubTrie(root common.Hash, depth int, parent common.Hash, callback LeafCallback) { 97 // Short circuit if the trie is empty or already known 98 if root == emptyRoot { 99 return 100 } 101 if _, ok := s.membatch.batch[root]; ok { 102 return 103 } 104 if s.bloom.Contains(root[:]) { 105 // Bloom filter says this might be a duplicate, double check 106 blob, _ := s.database.Get(root[:]) 107 if local, err := decodeNode(root[:], blob); local != nil && err == nil { 108 return 109 } 110 // False positive, bump fault meter 111 bloomFaultMeter.Mark(1) 112 } 113 // Assemble the new sub-trie sync request 114 req := &request{ 115 hash: root, 116 depth: depth, 117 callback: callback, 118 } 119 // If this sub-trie has a designated parent, link them together 120 if parent != (common.Hash{}) { 121 ancestor := s.requests[parent] 122 if ancestor == nil { 123 panic(fmt.Sprintf("sub-trie ancestor not found: %x", parent)) 124 } 125 ancestor.deps++ 126 req.parents = append(req.parents, ancestor) 127 } 128 s.schedule(req) 129 } 130 131 // AddRawEntry schedules the direct retrieval of a state entry that should not be 132 // interpreted as a trie node, but rather accepted and stored into the database 133 // as is. This method's goal is to support misc state metadata retrievals (e.g. 134 // contract code). 135 func (s *Sync) AddRawEntry(hash common.Hash, depth int, parent common.Hash) { 136 // Short circuit if the entry is empty or already known 137 if hash == emptyState { 138 return 139 } 140 if _, ok := s.membatch.batch[hash]; ok { 141 return 142 } 143 if s.bloom.Contains(hash[:]) { 144 // Bloom filter says this might be a duplicate, double check 145 if ok, _ := s.database.Has(hash[:]); ok { 146 return 147 } 148 // False positive, bump fault meter 149 bloomFaultMeter.Mark(1) 150 } 151 // Assemble the new sub-trie sync request 152 req := &request{ 153 hash: hash, 154 raw: true, 155 depth: depth, 156 } 157 // If this sub-trie has a designated parent, link them together 158 if parent != (common.Hash{}) { 159 ancestor := s.requests[parent] 160 if ancestor == nil { 161 panic(fmt.Sprintf("raw-entry ancestor not found: %x", parent)) 162 } 163 ancestor.deps++ 164 req.parents = append(req.parents, ancestor) 165 } 166 s.schedule(req) 167 } 168 169 // Missing retrieves the known missing nodes from the trie for retrieval. 170 func (s *Sync) Missing(max int) []common.Hash { 171 var requests []common.Hash 172 for !s.queue.Empty() && (max == 0 || len(requests) < max) { 173 requests = append(requests, s.queue.PopItem().(common.Hash)) 174 } 175 return requests 176 } 177 178 // Process injects a batch of retrieved trie nodes data, returning if something 179 // was committed to the database and also the index of an entry if processing of 180 // it failed. 181 func (s *Sync) Process(results []SyncResult) (bool, int, error) { 182 committed := false 183 184 for i, item := range results { 185 // If the item was not requested, bail out 186 request := s.requests[item.Hash] 187 if request == nil { 188 return committed, i, ErrNotRequested 189 } 190 if request.data != nil { 191 return committed, i, ErrAlreadyProcessed 192 } 193 // If the item is a raw entry request, commit directly 194 if request.raw { 195 request.data = item.Data 196 s.commit(request) 197 committed = true 198 continue 199 } 200 // Decode the node data content and update the request 201 node, err := decodeNode(item.Hash[:], item.Data) 202 if err != nil { 203 return committed, i, err 204 } 205 request.data = item.Data 206 207 // Create and schedule a request for all the children nodes 208 requests, err := s.children(request, node) 209 if err != nil { 210 return committed, i, err 211 } 212 if len(requests) == 0 && request.deps == 0 { 213 s.commit(request) 214 committed = true 215 continue 216 } 217 request.deps += len(requests) 218 for _, child := range requests { 219 s.schedule(child) 220 } 221 } 222 return committed, 0, nil 223 } 224 225 // Commit flushes the data stored in the internal membatch out to persistent 226 // storage, returning the number of items written and any occurred error. 227 func (s *Sync) Commit(dbw ethdb.KeyValueWriter) (int, error) { 228 // Dump the membatch into a database dbw 229 for i, key := range s.membatch.order { 230 if err := dbw.Put(key[:], s.membatch.batch[key]); err != nil { 231 return i, err 232 } 233 s.bloom.Add(key[:]) 234 } 235 written := len(s.membatch.order) // TODO(karalabe): could an order change improve write performance? 236 237 // Drop the membatch data and return 238 s.membatch = newSyncMemBatch() 239 return written, nil 240 } 241 242 // Pending returns the number of state entries currently pending for download. 243 func (s *Sync) Pending() int { 244 return len(s.requests) 245 } 246 247 // schedule inserts a new state retrieval request into the fetch queue. If there 248 // is already a pending request for this node, the new request will be discarded 249 // and only a parent reference added to the old one. 250 func (s *Sync) schedule(req *request) { 251 // If we're already requesting this node, add a new reference and stop 252 if old, ok := s.requests[req.hash]; ok { 253 old.parents = append(old.parents, req.parents...) 254 return 255 } 256 // Schedule the request for future retrieval 257 s.queue.Push(req.hash, int64(req.depth)) 258 s.requests[req.hash] = req 259 } 260 261 // children retrieves all the missing children of a state trie entry for future 262 // retrieval scheduling. 263 func (s *Sync) children(req *request, object node) ([]*request, error) { 264 // Gather all the children of the node, irrelevant whether known or not 265 type child struct { 266 node node 267 depth int 268 } 269 var children []child 270 271 switch node := (object).(type) { 272 case *shortNode: 273 children = []child{{ 274 node: node.Val, 275 depth: req.depth + len(node.Key), 276 }} 277 case *fullNode: 278 for i := 0; i < 17; i++ { 279 if node.Children[i] != nil { 280 children = append(children, child{ 281 node: node.Children[i], 282 depth: req.depth + 1, 283 }) 284 } 285 } 286 default: 287 panic(fmt.Sprintf("unknown node: %+v", node)) 288 } 289 // Iterate over the children, and request all unknown ones 290 requests := make([]*request, 0, len(children)) 291 for _, child := range children { 292 // Notify any external watcher of a new key/value node 293 if req.callback != nil { 294 if node, ok := (child.node).(valueNode); ok { 295 if err := req.callback(node, req.hash); err != nil { 296 return nil, err 297 } 298 } 299 } 300 // If the child references another node, resolve or schedule 301 if node, ok := (child.node).(hashNode); ok { 302 // Try to resolve the node from the local database 303 hash := common.BytesToHash(node) 304 if _, ok := s.membatch.batch[hash]; ok { 305 continue 306 } 307 if s.bloom.Contains(node) { 308 // Bloom filter says this might be a duplicate, double check 309 if ok, _ := s.database.Has(node); ok { 310 continue 311 } 312 // False positive, bump fault meter 313 bloomFaultMeter.Mark(1) 314 } 315 // Locally unknown node, schedule for retrieval 316 requests = append(requests, &request{ 317 hash: hash, 318 parents: []*request{req}, 319 depth: child.depth, 320 callback: req.callback, 321 }) 322 } 323 } 324 return requests, nil 325 } 326 327 // commit finalizes a retrieval request and stores it into the membatch. If any 328 // of the referencing parent requests complete due to this commit, they are also 329 // committed themselves. 330 func (s *Sync) commit(req *request) (err error) { 331 // Write the node content to the membatch 332 s.membatch.batch[req.hash] = req.data 333 s.membatch.order = append(s.membatch.order, req.hash) 334 335 delete(s.requests, req.hash) 336 337 // Check all parents for completion 338 for _, parent := range req.parents { 339 parent.deps-- 340 if parent.deps == 0 { 341 if err := s.commit(parent); err != nil { 342 return err 343 } 344 } 345 } 346 return nil 347 }