github.com/humaniq/go-ethereum@v1.6.8-0.20171225131628-061223a13848/trie/sync.go (about) 1 // Copyright 2015 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package trie 18 19 import ( 20 "errors" 21 "fmt" 22 23 "github.com/ethereum/go-ethereum/common" 24 "gopkg.in/karalabe/cookiejar.v2/collections/prque" 25 ) 26 27 // ErrNotRequested is returned by the trie sync when it's requested to process a 28 // node it did not request. 29 var ErrNotRequested = errors.New("not requested") 30 31 // ErrAlreadyProcessed is returned by the trie sync when it's requested to process a 32 // node it already processed previously. 33 var ErrAlreadyProcessed = errors.New("already processed") 34 35 // request represents a scheduled or already in-flight state retrieval request. 36 type request struct { 37 hash common.Hash // Hash of the node data content to retrieve 38 data []byte // Data content of the node, cached until all subtrees complete 39 raw bool // Whether this is a raw entry (code) or a trie node 40 41 parents []*request // Parent state nodes referencing this entry (notify all upon completion) 42 depth int // Depth level within the trie the node is located to prioritise DFS 43 deps int // Number of dependencies before allowed to commit this node 44 45 callback TrieSyncLeafCallback // Callback to invoke if a leaf node it reached on this branch 46 } 47 48 // SyncResult is a simple list to return missing nodes along with their request 49 // hashes. 50 type SyncResult struct { 51 Hash common.Hash // Hash of the originally unknown trie node 52 Data []byte // Data content of the retrieved node 53 } 54 55 // syncMemBatch is an in-memory buffer of successfully downloaded but not yet 56 // persisted data items. 57 type syncMemBatch struct { 58 batch map[common.Hash][]byte // In-memory membatch of recently completed items 59 order []common.Hash // Order of completion to prevent out-of-order data loss 60 } 61 62 // newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes. 63 func newSyncMemBatch() *syncMemBatch { 64 return &syncMemBatch{ 65 batch: make(map[common.Hash][]byte), 66 order: make([]common.Hash, 0, 256), 67 } 68 } 69 70 // TrieSyncLeafCallback is a callback type invoked when a trie sync reaches a 71 // leaf node. It's used by state syncing to check if the leaf node requires some 72 // further data syncing. 73 type TrieSyncLeafCallback func(leaf []byte, parent common.Hash) error 74 75 // TrieSync is the main state trie synchronisation scheduler, which provides yet 76 // unknown trie hashes to retrieve, accepts node data associated with said hashes 77 // and reconstructs the trie step by step until all is done. 78 type TrieSync struct { 79 database DatabaseReader // Persistent database to check for existing entries 80 membatch *syncMemBatch // Memory buffer to avoid frequest database writes 81 requests map[common.Hash]*request // Pending requests pertaining to a key hash 82 queue *prque.Prque // Priority queue with the pending requests 83 } 84 85 // NewTrieSync creates a new trie data download scheduler. 86 func NewTrieSync(root common.Hash, database DatabaseReader, callback TrieSyncLeafCallback) *TrieSync { 87 ts := &TrieSync{ 88 database: database, 89 membatch: newSyncMemBatch(), 90 requests: make(map[common.Hash]*request), 91 queue: prque.New(), 92 } 93 ts.AddSubTrie(root, 0, common.Hash{}, callback) 94 return ts 95 } 96 97 // AddSubTrie registers a new trie to the sync code, rooted at the designated parent. 98 func (s *TrieSync) AddSubTrie(root common.Hash, depth int, parent common.Hash, callback TrieSyncLeafCallback) { 99 // Short circuit if the trie is empty or already known 100 if root == emptyRoot { 101 return 102 } 103 if _, ok := s.membatch.batch[root]; ok { 104 return 105 } 106 key := root.Bytes() 107 blob, _ := s.database.Get(key) 108 if local, err := decodeNode(key, blob, 0); local != nil && err == nil { 109 return 110 } 111 // Assemble the new sub-trie sync request 112 req := &request{ 113 hash: root, 114 depth: depth, 115 callback: callback, 116 } 117 // If this sub-trie has a designated parent, link them together 118 if parent != (common.Hash{}) { 119 ancestor := s.requests[parent] 120 if ancestor == nil { 121 panic(fmt.Sprintf("sub-trie ancestor not found: %x", parent)) 122 } 123 ancestor.deps++ 124 req.parents = append(req.parents, ancestor) 125 } 126 s.schedule(req) 127 } 128 129 // AddRawEntry schedules the direct retrieval of a state entry that should not be 130 // interpreted as a trie node, but rather accepted and stored into the database 131 // as is. This method's goal is to support misc state metadata retrievals (e.g. 132 // contract code). 133 func (s *TrieSync) AddRawEntry(hash common.Hash, depth int, parent common.Hash) { 134 // Short circuit if the entry is empty or already known 135 if hash == emptyState { 136 return 137 } 138 if _, ok := s.membatch.batch[hash]; ok { 139 return 140 } 141 if ok, _ := s.database.Has(hash.Bytes()); ok { 142 return 143 } 144 // Assemble the new sub-trie sync request 145 req := &request{ 146 hash: hash, 147 raw: true, 148 depth: depth, 149 } 150 // If this sub-trie has a designated parent, link them together 151 if parent != (common.Hash{}) { 152 ancestor := s.requests[parent] 153 if ancestor == nil { 154 panic(fmt.Sprintf("raw-entry ancestor not found: %x", parent)) 155 } 156 ancestor.deps++ 157 req.parents = append(req.parents, ancestor) 158 } 159 s.schedule(req) 160 } 161 162 // Missing retrieves the known missing nodes from the trie for retrieval. 163 func (s *TrieSync) Missing(max int) []common.Hash { 164 requests := []common.Hash{} 165 for !s.queue.Empty() && (max == 0 || len(requests) < max) { 166 requests = append(requests, s.queue.PopItem().(common.Hash)) 167 } 168 return requests 169 } 170 171 // Process injects a batch of retrieved trie nodes data, returning if something 172 // was committed to the database and also the index of an entry if processing of 173 // it failed. 174 func (s *TrieSync) Process(results []SyncResult) (bool, int, error) { 175 committed := false 176 177 for i, item := range results { 178 // If the item was not requested, bail out 179 request := s.requests[item.Hash] 180 if request == nil { 181 return committed, i, ErrNotRequested 182 } 183 if request.data != nil { 184 return committed, i, ErrAlreadyProcessed 185 } 186 // If the item is a raw entry request, commit directly 187 if request.raw { 188 request.data = item.Data 189 s.commit(request) 190 committed = true 191 continue 192 } 193 // Decode the node data content and update the request 194 node, err := decodeNode(item.Hash[:], item.Data, 0) 195 if err != nil { 196 return committed, i, err 197 } 198 request.data = item.Data 199 200 // Create and schedule a request for all the children nodes 201 requests, err := s.children(request, node) 202 if err != nil { 203 return committed, i, err 204 } 205 if len(requests) == 0 && request.deps == 0 { 206 s.commit(request) 207 committed = true 208 continue 209 } 210 request.deps += len(requests) 211 for _, child := range requests { 212 s.schedule(child) 213 } 214 } 215 return committed, 0, nil 216 } 217 218 // Commit flushes the data stored in the internal membatch out to persistent 219 // storage, returning th enumber of items written and any occurred error. 220 func (s *TrieSync) Commit(dbw DatabaseWriter) (int, error) { 221 // Dump the membatch into a database dbw 222 for i, key := range s.membatch.order { 223 if err := dbw.Put(key[:], s.membatch.batch[key]); err != nil { 224 return i, err 225 } 226 } 227 written := len(s.membatch.order) 228 229 // Drop the membatch data and return 230 s.membatch = newSyncMemBatch() 231 return written, nil 232 } 233 234 // Pending returns the number of state entries currently pending for download. 235 func (s *TrieSync) Pending() int { 236 return len(s.requests) 237 } 238 239 // schedule inserts a new state retrieval request into the fetch queue. If there 240 // is already a pending request for this node, the new request will be discarded 241 // and only a parent reference added to the old one. 242 func (s *TrieSync) schedule(req *request) { 243 // If we're already requesting this node, add a new reference and stop 244 if old, ok := s.requests[req.hash]; ok { 245 old.parents = append(old.parents, req.parents...) 246 return 247 } 248 // Schedule the request for future retrieval 249 s.queue.Push(req.hash, float32(req.depth)) 250 s.requests[req.hash] = req 251 } 252 253 // children retrieves all the missing children of a state trie entry for future 254 // retrieval scheduling. 255 func (s *TrieSync) children(req *request, object node) ([]*request, error) { 256 // Gather all the children of the node, irrelevant whether known or not 257 type child struct { 258 node node 259 depth int 260 } 261 children := []child{} 262 263 switch node := (object).(type) { 264 case *shortNode: 265 children = []child{{ 266 node: node.Val, 267 depth: req.depth + len(node.Key), 268 }} 269 case *fullNode: 270 for i := 0; i < 17; i++ { 271 if node.Children[i] != nil { 272 children = append(children, child{ 273 node: node.Children[i], 274 depth: req.depth + 1, 275 }) 276 } 277 } 278 default: 279 panic(fmt.Sprintf("unknown node: %+v", node)) 280 } 281 // Iterate over the children, and request all unknown ones 282 requests := make([]*request, 0, len(children)) 283 for _, child := range children { 284 // Notify any external watcher of a new key/value node 285 if req.callback != nil { 286 if node, ok := (child.node).(valueNode); ok { 287 if err := req.callback(node, req.hash); err != nil { 288 return nil, err 289 } 290 } 291 } 292 // If the child references another node, resolve or schedule 293 if node, ok := (child.node).(hashNode); ok { 294 // Try to resolve the node from the local database 295 hash := common.BytesToHash(node) 296 if _, ok := s.membatch.batch[hash]; ok { 297 continue 298 } 299 if ok, _ := s.database.Has(node); ok { 300 continue 301 } 302 // Locally unknown node, schedule for retrieval 303 requests = append(requests, &request{ 304 hash: hash, 305 parents: []*request{req}, 306 depth: child.depth, 307 callback: req.callback, 308 }) 309 } 310 } 311 return requests, nil 312 } 313 314 // commit finalizes a retrieval request and stores it into the membatch. If any 315 // of the referencing parent requests complete due to this commit, they are also 316 // committed themselves. 317 func (s *TrieSync) commit(req *request) (err error) { 318 // Write the node content to the membatch 319 s.membatch.batch[req.hash] = req.data 320 s.membatch.order = append(s.membatch.order, req.hash) 321 322 delete(s.requests, req.hash) 323 324 // Check all parents for completion 325 for _, parent := range req.parents { 326 parent.deps-- 327 if parent.deps == 0 { 328 if err := s.commit(parent); err != nil { 329 return err 330 } 331 } 332 } 333 return nil 334 }