github.1485827954.workers.dev/ethereum/go-ethereum@v1.14.3/eth/downloader/beaconsync.go (about)

     1  // Copyright 2022 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package downloader
    18  
    19  import (
    20  	"fmt"
    21  	"sync"
    22  	"time"
    23  
    24  	"github.com/ethereum/go-ethereum/common"
    25  	"github.com/ethereum/go-ethereum/core/rawdb"
    26  	"github.com/ethereum/go-ethereum/core/types"
    27  	"github.com/ethereum/go-ethereum/log"
    28  )
    29  
    30  // beaconBackfiller is the chain and state backfilling that can be commenced once
    31  // the skeleton syncer has successfully reverse downloaded all the headers up to
    32  // the genesis block or an existing header in the database. Its operation is fully
    33  // directed by the skeleton sync's head/tail events.
    34  type beaconBackfiller struct {
    35  	downloader *Downloader   // Downloader to direct via this callback implementation
    36  	syncMode   SyncMode      // Sync mode to use for backfilling the skeleton chains
    37  	success    func()        // Callback to run on successful sync cycle completion
    38  	filling    bool          // Flag whether the downloader is backfilling or not
    39  	filled     *types.Header // Last header filled by the last terminated sync loop
    40  	started    chan struct{} // Notification channel whether the downloader inited
    41  	lock       sync.Mutex    // Mutex protecting the sync lock
    42  }
    43  
    44  // newBeaconBackfiller is a helper method to create the backfiller.
    45  func newBeaconBackfiller(dl *Downloader, success func()) backfiller {
    46  	return &beaconBackfiller{
    47  		downloader: dl,
    48  		success:    success,
    49  	}
    50  }
    51  
    52  // suspend cancels any background downloader threads and returns the last header
    53  // that has been successfully backfilled (potentially in a previous run), or the
    54  // genesis.
    55  func (b *beaconBackfiller) suspend() *types.Header {
    56  	// If no filling is running, don't waste cycles
    57  	b.lock.Lock()
    58  	filling := b.filling
    59  	filled := b.filled
    60  	started := b.started
    61  	b.lock.Unlock()
    62  
    63  	if !filling {
    64  		return filled // Return the filled header on the previous sync completion
    65  	}
    66  	// A previous filling should be running, though it may happen that it hasn't
    67  	// yet started (being done on a new goroutine). Many concurrent beacon head
    68  	// announcements can lead to sync start/stop thrashing. In that case we need
    69  	// to wait for initialization before we can safely cancel it. It is safe to
    70  	// read this channel multiple times, it gets closed on startup.
    71  	<-started
    72  
    73  	// Now that we're sure the downloader successfully started up, we can cancel
    74  	// it safely without running the risk of data races.
    75  	b.downloader.Cancel()
    76  
    77  	// Sync cycle was just terminated, retrieve and return the last filled header.
    78  	// Can't use `filled` as that contains a stale value from before cancellation.
    79  	return b.downloader.blockchain.CurrentSnapBlock()
    80  }
    81  
    82  // resume starts the downloader threads for backfilling state and chain data.
    83  func (b *beaconBackfiller) resume() {
    84  	b.lock.Lock()
    85  	if b.filling {
    86  		// If a previous filling cycle is still running, just ignore this start
    87  		// request. // TODO(karalabe): We should make this channel driven
    88  		b.lock.Unlock()
    89  		return
    90  	}
    91  	b.filling = true
    92  	b.filled = nil
    93  	b.started = make(chan struct{})
    94  	mode := b.syncMode
    95  	b.lock.Unlock()
    96  
    97  	// Start the backfilling on its own thread since the downloader does not have
    98  	// its own lifecycle runloop.
    99  	go func() {
   100  		// Set the backfiller to non-filling when download completes
   101  		defer func() {
   102  			b.lock.Lock()
   103  			b.filling = false
   104  			b.filled = b.downloader.blockchain.CurrentSnapBlock()
   105  			b.lock.Unlock()
   106  		}()
   107  		// If the downloader fails, report an error as in beacon chain mode there
   108  		// should be no errors as long as the chain we're syncing to is valid.
   109  		if err := b.downloader.synchronise(mode, b.started); err != nil {
   110  			log.Error("Beacon backfilling failed", "err", err)
   111  			return
   112  		}
   113  		// Synchronization succeeded. Since this happens async, notify the outer
   114  		// context to disable snap syncing and enable transaction propagation.
   115  		if b.success != nil {
   116  			b.success()
   117  		}
   118  	}()
   119  }
   120  
   121  // setMode updates the sync mode from the current one to the requested one. If
   122  // there's an active sync in progress, it will be cancelled and restarted.
   123  func (b *beaconBackfiller) setMode(mode SyncMode) {
   124  	// Update the old sync mode and track if it was changed
   125  	b.lock.Lock()
   126  	updated := b.syncMode != mode
   127  	filling := b.filling
   128  	b.syncMode = mode
   129  	b.lock.Unlock()
   130  
   131  	// If the sync mode was changed mid-sync, restart. This should never ever
   132  	// really happen, we just handle it to detect programming errors.
   133  	if !updated || !filling {
   134  		return
   135  	}
   136  	log.Error("Downloader sync mode changed mid-run", "old", mode.String(), "new", mode.String())
   137  	b.suspend()
   138  	b.resume()
   139  }
   140  
   141  // SetBadBlockCallback sets the callback to run when a bad block is hit by the
   142  // block processor. This method is not thread safe and should be set only once
   143  // on startup before system events are fired.
   144  func (d *Downloader) SetBadBlockCallback(onBadBlock badBlockFn) {
   145  	d.badBlock = onBadBlock
   146  }
   147  
   148  // BeaconSync is the post-merge version of the chain synchronization, where the
   149  // chain is not downloaded from genesis onward, rather from trusted head announces
   150  // backwards.
   151  //
   152  // Internally backfilling and state sync is done the same way, but the header
   153  // retrieval and scheduling is replaced.
   154  func (d *Downloader) BeaconSync(mode SyncMode, head *types.Header, final *types.Header) error {
   155  	return d.beaconSync(mode, head, final, true)
   156  }
   157  
   158  // BeaconExtend is an optimistic version of BeaconSync, where an attempt is made
   159  // to extend the current beacon chain with a new header, but in case of a mismatch,
   160  // the old sync will not be terminated and reorged, rather the new head is dropped.
   161  //
   162  // This is useful if a beacon client is feeding us large chunks of payloads to run,
   163  // but is not setting the head after each.
   164  func (d *Downloader) BeaconExtend(mode SyncMode, head *types.Header) error {
   165  	return d.beaconSync(mode, head, nil, false)
   166  }
   167  
   168  // beaconSync is the post-merge version of the chain synchronization, where the
   169  // chain is not downloaded from genesis onward, rather from trusted head announces
   170  // backwards.
   171  //
   172  // Internally backfilling and state sync is done the same way, but the header
   173  // retrieval and scheduling is replaced.
   174  func (d *Downloader) beaconSync(mode SyncMode, head *types.Header, final *types.Header, force bool) error {
   175  	// When the downloader starts a sync cycle, it needs to be aware of the sync
   176  	// mode to use (full, snap). To keep the skeleton chain oblivious, inject the
   177  	// mode into the backfiller directly.
   178  	//
   179  	// Super crazy dangerous type cast. Should be fine (TM), we're only using a
   180  	// different backfiller implementation for skeleton tests.
   181  	d.skeleton.filler.(*beaconBackfiller).setMode(mode)
   182  
   183  	// Signal the skeleton sync to switch to a new head, however it wants
   184  	if err := d.skeleton.Sync(head, final, force); err != nil {
   185  		return err
   186  	}
   187  	return nil
   188  }
   189  
   190  // findBeaconAncestor tries to locate the common ancestor link of the local chain
   191  // and the beacon chain just requested. In the general case when our node was in
   192  // sync and on the correct chain, checking the top N links should already get us
   193  // a match. In the rare scenario when we ended up on a long reorganisation (i.e.
   194  // none of the head links match), we do a binary search to find the ancestor.
   195  func (d *Downloader) findBeaconAncestor() (uint64, error) {
   196  	// Figure out the current local head position
   197  	var chainHead *types.Header
   198  
   199  	switch d.getMode() {
   200  	case FullSync:
   201  		chainHead = d.blockchain.CurrentBlock()
   202  	case SnapSync:
   203  		chainHead = d.blockchain.CurrentSnapBlock()
   204  	default:
   205  		chainHead = d.lightchain.CurrentHeader()
   206  	}
   207  	number := chainHead.Number.Uint64()
   208  
   209  	// Retrieve the skeleton bounds and ensure they are linked to the local chain
   210  	beaconHead, beaconTail, _, err := d.skeleton.Bounds()
   211  	if err != nil {
   212  		// This is a programming error. The chain backfiller was called with an
   213  		// invalid beacon sync state. Ideally we would panic here, but erroring
   214  		// gives us at least a remote chance to recover. It's still a big fault!
   215  		log.Error("Failed to retrieve beacon bounds", "err", err)
   216  		return 0, err
   217  	}
   218  	var linked bool
   219  	switch d.getMode() {
   220  	case FullSync:
   221  		linked = d.blockchain.HasBlock(beaconTail.ParentHash, beaconTail.Number.Uint64()-1)
   222  	case SnapSync:
   223  		linked = d.blockchain.HasFastBlock(beaconTail.ParentHash, beaconTail.Number.Uint64()-1)
   224  	default:
   225  		linked = d.blockchain.HasHeader(beaconTail.ParentHash, beaconTail.Number.Uint64()-1)
   226  	}
   227  	if !linked {
   228  		// This is a programming error. The chain backfiller was called with a
   229  		// tail that's not linked to the local chain. Whilst this should never
   230  		// happen, there might be some weirdnesses if beacon sync backfilling
   231  		// races with the user (or beacon client) calling setHead. Whilst panic
   232  		// would be the ideal thing to do, it is safer long term to attempt a
   233  		// recovery and fix any noticed issue after the fact.
   234  		log.Error("Beacon sync linkup unavailable", "number", beaconTail.Number.Uint64()-1, "hash", beaconTail.ParentHash)
   235  		return 0, fmt.Errorf("beacon linkup unavailable locally: %d [%x]", beaconTail.Number.Uint64()-1, beaconTail.ParentHash)
   236  	}
   237  	// Binary search to find the ancestor
   238  	start, end := beaconTail.Number.Uint64()-1, number
   239  	if number := beaconHead.Number.Uint64(); end > number {
   240  		// This shouldn't really happen in a healthy network, but if the consensus
   241  		// clients feeds us a shorter chain as the canonical, we should not attempt
   242  		// to access non-existent skeleton items.
   243  		log.Warn("Beacon head lower than local chain", "beacon", number, "local", end)
   244  		end = number
   245  	}
   246  	for start+1 < end {
   247  		// Split our chain interval in two, and request the hash to cross check
   248  		check := (start + end) / 2
   249  
   250  		h := d.skeleton.Header(check)
   251  		n := h.Number.Uint64()
   252  
   253  		var known bool
   254  		switch d.getMode() {
   255  		case FullSync:
   256  			known = d.blockchain.HasBlock(h.Hash(), n)
   257  		case SnapSync:
   258  			known = d.blockchain.HasFastBlock(h.Hash(), n)
   259  		default:
   260  			known = d.lightchain.HasHeader(h.Hash(), n)
   261  		}
   262  		if !known {
   263  			end = check
   264  			continue
   265  		}
   266  		start = check
   267  	}
   268  	return start, nil
   269  }
   270  
   271  // fetchHeaders feeds skeleton headers to the downloader queue for scheduling
   272  // until sync errors or is finished.
   273  func (d *Downloader) fetchHeaders(from uint64) error {
   274  	var head *types.Header
   275  	_, tail, _, err := d.skeleton.Bounds()
   276  	if err != nil {
   277  		return err
   278  	}
   279  	// A part of headers are not in the skeleton space, try to resolve
   280  	// them from the local chain. Note the range should be very short
   281  	// and it should only happen when there are less than 64 post-merge
   282  	// blocks in the network.
   283  	var localHeaders []*types.Header
   284  	if from < tail.Number.Uint64() {
   285  		count := tail.Number.Uint64() - from
   286  		if count > uint64(fsMinFullBlocks) {
   287  			return fmt.Errorf("invalid origin (%d) of beacon sync (%d)", from, tail.Number)
   288  		}
   289  		localHeaders = d.readHeaderRange(tail, int(count))
   290  		log.Warn("Retrieved beacon headers from local", "from", from, "count", count)
   291  	}
   292  	fsHeaderContCheckTimer := time.NewTimer(fsHeaderContCheck)
   293  	defer fsHeaderContCheckTimer.Stop()
   294  
   295  	for {
   296  		// Some beacon headers might have appeared since the last cycle, make
   297  		// sure we're always syncing to all available ones
   298  		head, _, _, err = d.skeleton.Bounds()
   299  		if err != nil {
   300  			return err
   301  		}
   302  		// If the pivot became stale (older than 2*64-8 (bit of wiggle room)),
   303  		// move it ahead to HEAD-64
   304  		d.pivotLock.Lock()
   305  		if d.pivotHeader != nil {
   306  			if head.Number.Uint64() > d.pivotHeader.Number.Uint64()+2*uint64(fsMinFullBlocks)-8 {
   307  				// Retrieve the next pivot header, either from skeleton chain
   308  				// or the filled chain
   309  				number := head.Number.Uint64() - uint64(fsMinFullBlocks)
   310  
   311  				log.Warn("Pivot seemingly stale, moving", "old", d.pivotHeader.Number, "new", number)
   312  				if d.pivotHeader = d.skeleton.Header(number); d.pivotHeader == nil {
   313  					if number < tail.Number.Uint64() {
   314  						dist := tail.Number.Uint64() - number
   315  						if len(localHeaders) >= int(dist) {
   316  							d.pivotHeader = localHeaders[dist-1]
   317  							log.Warn("Retrieved pivot header from local", "number", d.pivotHeader.Number, "hash", d.pivotHeader.Hash(), "latest", head.Number, "oldest", tail.Number)
   318  						}
   319  					}
   320  				}
   321  				// Print an error log and return directly in case the pivot header
   322  				// is still not found. It means the skeleton chain is not linked
   323  				// correctly with local chain.
   324  				if d.pivotHeader == nil {
   325  					log.Error("Pivot header is not found", "number", number)
   326  					d.pivotLock.Unlock()
   327  					return errNoPivotHeader
   328  				}
   329  				// Write out the pivot into the database so a rollback beyond
   330  				// it will reenable snap sync and update the state root that
   331  				// the state syncer will be downloading
   332  				rawdb.WriteLastPivotNumber(d.stateDB, d.pivotHeader.Number.Uint64())
   333  			}
   334  		}
   335  		d.pivotLock.Unlock()
   336  
   337  		// Retrieve a batch of headers and feed it to the header processor
   338  		var (
   339  			headers = make([]*types.Header, 0, maxHeadersProcess)
   340  			hashes  = make([]common.Hash, 0, maxHeadersProcess)
   341  		)
   342  		for i := 0; i < maxHeadersProcess && from <= head.Number.Uint64(); i++ {
   343  			header := d.skeleton.Header(from)
   344  
   345  			// The header is not found in skeleton space, try to find it in local chain.
   346  			if header == nil && from < tail.Number.Uint64() {
   347  				dist := tail.Number.Uint64() - from
   348  				if len(localHeaders) >= int(dist) {
   349  					header = localHeaders[dist-1]
   350  				}
   351  			}
   352  			// The header is still missing, the beacon sync is corrupted and bail out
   353  			// the error here.
   354  			if header == nil {
   355  				return fmt.Errorf("missing beacon header %d", from)
   356  			}
   357  			headers = append(headers, header)
   358  			hashes = append(hashes, headers[i].Hash())
   359  			from++
   360  		}
   361  		if len(headers) > 0 {
   362  			log.Trace("Scheduling new beacon headers", "count", len(headers), "from", from-uint64(len(headers)))
   363  			select {
   364  			case d.headerProcCh <- &headerTask{
   365  				headers: headers,
   366  				hashes:  hashes,
   367  			}:
   368  			case <-d.cancelCh:
   369  				return errCanceled
   370  			}
   371  		}
   372  		// If we still have headers to import, loop and keep pushing them
   373  		if from <= head.Number.Uint64() {
   374  			continue
   375  		}
   376  		// If the pivot block is committed, signal header sync termination
   377  		if d.committed.Load() {
   378  			select {
   379  			case d.headerProcCh <- nil:
   380  				return nil
   381  			case <-d.cancelCh:
   382  				return errCanceled
   383  			}
   384  		}
   385  		// State sync still going, wait a bit for new headers and retry
   386  		log.Trace("Pivot not yet committed, waiting...")
   387  		fsHeaderContCheckTimer.Reset(fsHeaderContCheck)
   388  		select {
   389  		case <-fsHeaderContCheckTimer.C:
   390  		case <-d.cancelCh:
   391  			return errCanceled
   392  		}
   393  	}
   394  }