github.com/ethereum/go-ethereum@v1.16.1/triedb/pathdb/iterator_fast.go (about)

     1  // Copyright 2024 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package pathdb
    18  
    19  import (
    20  	"bytes"
    21  	"cmp"
    22  	"fmt"
    23  	"slices"
    24  	"sort"
    25  
    26  	"github.com/ethereum/go-ethereum/common"
    27  )
    28  
    29  // weightedIterator is an iterator with an assigned weight. It is used to prioritise
    30  // which account or storage slot is the correct one if multiple iterators find the
    31  // same one (modified in multiple consecutive blocks).
    32  type weightedIterator struct {
    33  	it       Iterator
    34  	priority int
    35  }
    36  
    37  func (it *weightedIterator) Cmp(other *weightedIterator) int {
    38  	// Order the iterators primarily by the account hashes
    39  	hashI := it.it.Hash()
    40  	hashJ := other.it.Hash()
    41  
    42  	switch bytes.Compare(hashI[:], hashJ[:]) {
    43  	case -1:
    44  		return -1
    45  	case 1:
    46  		return 1
    47  	}
    48  	// Same account/storage-slot in multiple layers, split by priority
    49  	return cmp.Compare(it.priority, other.priority)
    50  }
    51  
    52  // fastIterator is a more optimized multi-layer iterator which maintains a
    53  // direct mapping of all iterators leading down to the bottom layer.
    54  type fastIterator struct {
    55  	curAccount []byte
    56  	curSlot    []byte
    57  
    58  	iterators []*weightedIterator
    59  	initiated bool
    60  	account   bool
    61  	fail      error
    62  }
    63  
    64  // newFastIterator creates a new hierarchical account or storage iterator with one
    65  // element per diff layer. The returned combo iterator can be used to walk over
    66  // the entire layer stack simultaneously.
    67  func newFastIterator(db *Database, root common.Hash, account common.Hash, seek common.Hash, accountIterator bool) (*fastIterator, error) {
    68  	current := db.tree.get(root)
    69  	if current == nil {
    70  		return nil, fmt.Errorf("unknown layer: %x", root)
    71  	}
    72  	fi := &fastIterator{
    73  		account: accountIterator,
    74  	}
    75  	for depth := 0; current != nil; depth++ {
    76  		if accountIterator {
    77  			switch dl := current.(type) {
    78  			case *diskLayer:
    79  				// Ensure no active background buffer flush is in progress, otherwise,
    80  				// part of the state data may become invisible.
    81  				if err := dl.waitFlush(); err != nil {
    82  					return nil, err
    83  				}
    84  				// The state set in the disk layer is mutable, hold the lock before obtaining
    85  				// the account list to prevent concurrent map iteration and write.
    86  				dl.lock.RLock()
    87  				accountList := dl.buffer.states.accountList()
    88  				dl.lock.RUnlock()
    89  
    90  				fi.iterators = append(fi.iterators, &weightedIterator{
    91  					// The state set in the disk layer is mutable, and the entire state becomes stale
    92  					// if a diff layer above is merged into it. Therefore, staleness must be checked,
    93  					// and the storage slot should be retrieved with read lock protection.
    94  					it: newDiffAccountIterator(seek, accountList, func(hash common.Hash) ([]byte, error) {
    95  						dl.lock.RLock()
    96  						defer dl.lock.RUnlock()
    97  
    98  						if dl.stale {
    99  							return nil, errSnapshotStale
   100  						}
   101  						return dl.buffer.states.mustAccount(hash)
   102  					}),
   103  					priority: depth,
   104  				})
   105  				fi.iterators = append(fi.iterators, &weightedIterator{
   106  					it:       newDiskAccountIterator(dl.db.diskdb, seek),
   107  					priority: depth + 1,
   108  				})
   109  			case *diffLayer:
   110  				// The state set in diff layer is immutable and will never be stale,
   111  				// so the read lock protection is unnecessary.
   112  				accountList := dl.states.accountList()
   113  				fi.iterators = append(fi.iterators, &weightedIterator{
   114  					it:       newDiffAccountIterator(seek, accountList, dl.states.mustAccount),
   115  					priority: depth,
   116  				})
   117  			}
   118  		} else {
   119  			switch dl := current.(type) {
   120  			case *diskLayer:
   121  				// Ensure no active background buffer flush is in progress, otherwise,
   122  				// part of the state data may become invisible.
   123  				if err := dl.waitFlush(); err != nil {
   124  					return nil, err
   125  				}
   126  				// The state set in the disk layer is mutable, hold the lock before obtaining
   127  				// the storage list to prevent concurrent map iteration and write.
   128  				dl.lock.RLock()
   129  				storageList := dl.buffer.states.storageList(account)
   130  				dl.lock.RUnlock()
   131  
   132  				fi.iterators = append(fi.iterators, &weightedIterator{
   133  					// The state set in the disk layer is mutable, and the entire state becomes stale
   134  					// if a diff layer above is merged into it. Therefore, staleness must be checked,
   135  					// and the storage slot should be retrieved with read lock protection.
   136  					it: newDiffStorageIterator(account, seek, storageList, func(addrHash common.Hash, slotHash common.Hash) ([]byte, error) {
   137  						dl.lock.RLock()
   138  						defer dl.lock.RUnlock()
   139  
   140  						if dl.stale {
   141  							return nil, errSnapshotStale
   142  						}
   143  						return dl.buffer.states.mustStorage(addrHash, slotHash)
   144  					}),
   145  					priority: depth,
   146  				})
   147  				fi.iterators = append(fi.iterators, &weightedIterator{
   148  					it:       newDiskStorageIterator(dl.db.diskdb, account, seek),
   149  					priority: depth + 1,
   150  				})
   151  			case *diffLayer:
   152  				// The state set in diff layer is immutable and will never be stale,
   153  				// so the read lock protection is unnecessary.
   154  				storageList := dl.states.storageList(account)
   155  
   156  				// The state set in diff layer is immutable and will never be stale,
   157  				// so the read lock protection is unnecessary.
   158  				fi.iterators = append(fi.iterators, &weightedIterator{
   159  					it:       newDiffStorageIterator(account, seek, storageList, dl.states.mustStorage),
   160  					priority: depth,
   161  				})
   162  			}
   163  		}
   164  		current = current.parentLayer()
   165  	}
   166  	fi.init()
   167  	return fi, nil
   168  }
   169  
   170  // init walks over all the iterators and resolves any clashes between them, after
   171  // which it prepares the stack for step-by-step iteration.
   172  func (fi *fastIterator) init() {
   173  	// Track which account hashes are iterators positioned on
   174  	var positioned = make(map[common.Hash]int)
   175  
   176  	// Position all iterators and track how many remain live
   177  	for i := 0; i < len(fi.iterators); i++ {
   178  		// Retrieve the first element and if it clashes with a previous iterator,
   179  		// advance either the current one or the old one. Repeat until nothing is
   180  		// clashing anymore.
   181  		it := fi.iterators[i]
   182  		for {
   183  			// If the iterator is exhausted, drop it off the end
   184  			if !it.it.Next() {
   185  				it.it.Release()
   186  				last := len(fi.iterators) - 1
   187  
   188  				fi.iterators[i] = fi.iterators[last]
   189  				fi.iterators[last] = nil
   190  				fi.iterators = fi.iterators[:last]
   191  
   192  				i--
   193  				break
   194  			}
   195  			// The iterator is still alive, check for collisions with previous ones
   196  			hash := it.it.Hash()
   197  			if other, exist := positioned[hash]; !exist {
   198  				positioned[hash] = i
   199  				break
   200  			} else {
   201  				// Iterators collide, one needs to be progressed, use priority to
   202  				// determine which.
   203  				//
   204  				// This whole else-block can be avoided, if we instead
   205  				// do an initial priority-sort of the iterators. If we do that,
   206  				// then we'll only wind up here if a lower-priority (preferred) iterator
   207  				// has the same value, and then we will always just continue.
   208  				// However, it costs an extra sort, so it's probably not better
   209  				if fi.iterators[other].priority < it.priority {
   210  					// The 'it' should be progressed
   211  					continue
   212  				} else {
   213  					// The 'other' should be progressed, swap them
   214  					it = fi.iterators[other]
   215  					fi.iterators[other], fi.iterators[i] = fi.iterators[i], fi.iterators[other]
   216  					continue
   217  				}
   218  			}
   219  		}
   220  	}
   221  	// Re-sort the entire list
   222  	slices.SortFunc(fi.iterators, func(a, b *weightedIterator) int { return a.Cmp(b) })
   223  	fi.initiated = false
   224  }
   225  
   226  // Next steps the iterator forward one element, returning false if exhausted.
   227  func (fi *fastIterator) Next() bool {
   228  	if len(fi.iterators) == 0 {
   229  		return false
   230  	}
   231  	if !fi.initiated {
   232  		// Don't forward first time -- we had to 'Next' once in order to
   233  		// do the sorting already
   234  		fi.initiated = true
   235  		if fi.account {
   236  			fi.curAccount = fi.iterators[0].it.(AccountIterator).Account()
   237  		} else {
   238  			fi.curSlot = fi.iterators[0].it.(StorageIterator).Slot()
   239  		}
   240  		if innerErr := fi.iterators[0].it.Error(); innerErr != nil {
   241  			fi.fail = innerErr
   242  			return false
   243  		}
   244  		if fi.curAccount != nil || fi.curSlot != nil {
   245  			return true
   246  		}
   247  		// Implicit else: we've hit a nil-account or nil-slot, and need to
   248  		// fall through to the loop below to land on something non-nil
   249  	}
   250  	// If an account or a slot is deleted in one of the layers, the key will
   251  	// still be there, but the actual value will be nil. However, the iterator
   252  	// should not export nil-values (but instead simply omit the key), so we
   253  	// need to loop here until we either
   254  	//  - get a non-nil value,
   255  	//  - hit an error,
   256  	//  - or exhaust the iterator
   257  	for {
   258  		if !fi.next(0) {
   259  			return false // exhausted
   260  		}
   261  		if fi.account {
   262  			fi.curAccount = fi.iterators[0].it.(AccountIterator).Account()
   263  		} else {
   264  			fi.curSlot = fi.iterators[0].it.(StorageIterator).Slot()
   265  		}
   266  		if innerErr := fi.iterators[0].it.Error(); innerErr != nil {
   267  			fi.fail = innerErr
   268  			return false // error
   269  		}
   270  		if fi.curAccount != nil || fi.curSlot != nil {
   271  			break // non-nil value found
   272  		}
   273  	}
   274  	return true
   275  }
   276  
   277  // next handles the next operation internally and should be invoked when we know
   278  // that two elements in the list may have the same value.
   279  //
   280  // For example, if the iterated hashes become [2,3,5,5,8,9,10], then we should
   281  // invoke next(3), which will call Next on elem 3 (the second '5') and will
   282  // cascade along the list, applying the same operation if needed.
   283  func (fi *fastIterator) next(idx int) bool {
   284  	// If this particular iterator got exhausted, remove it and return true (the
   285  	// next one is surely not exhausted yet, otherwise it would have been removed
   286  	// already).
   287  	if it := fi.iterators[idx].it; !it.Next() {
   288  		it.Release()
   289  
   290  		fi.iterators = append(fi.iterators[:idx], fi.iterators[idx+1:]...)
   291  		return len(fi.iterators) > 0
   292  	}
   293  	// If there's no one left to cascade into, return
   294  	if idx == len(fi.iterators)-1 {
   295  		return true
   296  	}
   297  	// We next-ed the iterator at 'idx', now we may have to re-sort that element
   298  	var (
   299  		cur, next         = fi.iterators[idx], fi.iterators[idx+1]
   300  		curHash, nextHash = cur.it.Hash(), next.it.Hash()
   301  	)
   302  	if diff := bytes.Compare(curHash[:], nextHash[:]); diff < 0 {
   303  		// It is still in correct place
   304  		return true
   305  	} else if diff == 0 && cur.priority < next.priority {
   306  		// So still in correct place, but we need to iterate on the next
   307  		fi.next(idx + 1)
   308  		return true
   309  	}
   310  	// At this point, the iterator is in the wrong location, but the remaining
   311  	// list is sorted. Find out where to move the item.
   312  	clash := -1
   313  	index := sort.Search(len(fi.iterators), func(n int) bool {
   314  		// The iterator always advances forward, so anything before the old slot
   315  		// is known to be behind us, so just skip them altogether. This actually
   316  		// is an important clause since the sort order got invalidated.
   317  		if n < idx {
   318  			return false
   319  		}
   320  		if n == len(fi.iterators)-1 {
   321  			// Can always place an elem last
   322  			return true
   323  		}
   324  		nextHash := fi.iterators[n+1].it.Hash()
   325  		if diff := bytes.Compare(curHash[:], nextHash[:]); diff < 0 {
   326  			return true
   327  		} else if diff > 0 {
   328  			return false
   329  		}
   330  		// The elem we're placing it next to has the same value,
   331  		// so whichever winds up on n+1 will need further iteration
   332  		clash = n + 1
   333  
   334  		return cur.priority < fi.iterators[n+1].priority
   335  	})
   336  	fi.move(idx, index)
   337  	if clash != -1 {
   338  		fi.next(clash)
   339  	}
   340  	return true
   341  }
   342  
   343  // move advances an iterator to another position in the list.
   344  func (fi *fastIterator) move(index, newpos int) {
   345  	elem := fi.iterators[index]
   346  	copy(fi.iterators[index:], fi.iterators[index+1:newpos+1])
   347  	fi.iterators[newpos] = elem
   348  }
   349  
   350  // Error returns any failure that occurred during iteration, which might have
   351  // caused a premature iteration exit (e.g. snapshot stack becoming stale).
   352  func (fi *fastIterator) Error() error {
   353  	return fi.fail
   354  }
   355  
   356  // Hash returns the current key
   357  func (fi *fastIterator) Hash() common.Hash {
   358  	return fi.iterators[0].it.Hash()
   359  }
   360  
   361  // Account returns the current account blob.
   362  // Note the returned account is not a copy, please don't modify it.
   363  func (fi *fastIterator) Account() []byte {
   364  	return fi.curAccount
   365  }
   366  
   367  // Slot returns the current storage slot.
   368  // Note the returned slot is not a copy, please don't modify it.
   369  func (fi *fastIterator) Slot() []byte {
   370  	return fi.curSlot
   371  }
   372  
   373  // Release iterates over all the remaining live layer iterators and releases each
   374  // of them individually.
   375  func (fi *fastIterator) Release() {
   376  	for _, it := range fi.iterators {
   377  		it.it.Release()
   378  	}
   379  	fi.iterators = nil
   380  }
   381  
   382  // Debug is a convenience helper during testing
   383  func (fi *fastIterator) Debug() {
   384  	for _, it := range fi.iterators {
   385  		fmt.Printf("[p=%v v=%v] ", it.priority, it.it.Hash()[0])
   386  	}
   387  	fmt.Println()
   388  }
   389  
   390  // newFastAccountIterator creates a new hierarchical account iterator with one
   391  // element per diff layer. The returned combo iterator can be used to walk over
   392  // the entire snapshot diff stack simultaneously.
   393  func newFastAccountIterator(db *Database, root common.Hash, seek common.Hash) (AccountIterator, error) {
   394  	return newFastIterator(db, root, common.Hash{}, seek, true)
   395  }
   396  
   397  // newFastStorageIterator creates a new hierarchical storage iterator with one
   398  // element per diff layer. The returned combo iterator can be used to walk over
   399  // the entire snapshot diff stack simultaneously.
   400  func newFastStorageIterator(db *Database, root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) {
   401  	return newFastIterator(db, root, account, seek, false)
   402  }