github.com/aychain/blockbook@v0.1.1-0.20181121092459-6d1fc7e07c5b/db/sync.go (about)

     1  package db
     2  
     3  import (
     4  	"blockbook/bchain"
     5  	"blockbook/common"
     6  	"os"
     7  	"sync"
     8  	"sync/atomic"
     9  	"time"
    10  
    11  	"github.com/golang/glog"
    12  	"github.com/juju/errors"
    13  )
    14  
    15  // SyncWorker is handle to SyncWorker
    16  type SyncWorker struct {
    17  	db                     *RocksDB
    18  	chain                  bchain.BlockChain
    19  	syncWorkers, syncChunk int
    20  	dryRun                 bool
    21  	startHeight            uint32
    22  	startHash              string
    23  	chanOsSignal           chan os.Signal
    24  	metrics                *common.Metrics
    25  	is                     *common.InternalState
    26  }
    27  
    28  // NewSyncWorker creates new SyncWorker and returns its handle
    29  func NewSyncWorker(db *RocksDB, chain bchain.BlockChain, syncWorkers, syncChunk int, minStartHeight int, dryRun bool, chanOsSignal chan os.Signal, metrics *common.Metrics, is *common.InternalState) (*SyncWorker, error) {
    30  	if minStartHeight < 0 {
    31  		minStartHeight = 0
    32  	}
    33  	return &SyncWorker{
    34  		db:           db,
    35  		chain:        chain,
    36  		syncWorkers:  syncWorkers,
    37  		syncChunk:    syncChunk,
    38  		dryRun:       dryRun,
    39  		startHeight:  uint32(minStartHeight),
    40  		chanOsSignal: chanOsSignal,
    41  		metrics:      metrics,
    42  		is:           is,
    43  	}, nil
    44  }
    45  
    46  var errSynced = errors.New("synced")
    47  
    48  // ResyncIndex synchronizes index to the top of the blockchain
    49  // onNewBlock is called when new block is connected, but not in initial parallel sync
    50  func (w *SyncWorker) ResyncIndex(onNewBlock bchain.OnNewBlockFunc, initialSync bool) error {
    51  	start := time.Now()
    52  	w.is.StartedSync()
    53  
    54  	err := w.resyncIndex(onNewBlock, initialSync)
    55  
    56  	switch err {
    57  	case nil:
    58  		d := time.Since(start)
    59  		glog.Info("resync: finished in ", d)
    60  		w.metrics.IndexResyncDuration.Observe(float64(d) / 1e6) // in milliseconds
    61  		w.metrics.IndexDBSize.Set(float64(w.db.DatabaseSizeOnDisk()))
    62  		bh, _, err := w.db.GetBestBlock()
    63  		if err == nil {
    64  			w.is.FinishedSync(bh)
    65  		}
    66  		return nil
    67  	case errSynced:
    68  		// this is not actually error but flag that resync wasn't necessary
    69  		w.is.FinishedSyncNoChange()
    70  		w.metrics.IndexDBSize.Set(float64(w.db.DatabaseSizeOnDisk()))
    71  		return nil
    72  	}
    73  
    74  	w.metrics.IndexResyncErrors.With(common.Labels{"error": err.Error()}).Inc()
    75  
    76  	return err
    77  }
    78  
    79  func (w *SyncWorker) resyncIndex(onNewBlock bchain.OnNewBlockFunc, initialSync bool) error {
    80  	remoteBestHash, err := w.chain.GetBestBlockHash()
    81  	if err != nil {
    82  		return err
    83  	}
    84  	localBestHeight, localBestHash, err := w.db.GetBestBlock()
    85  	if err != nil {
    86  		return err
    87  	}
    88  	// If the locally indexed block is the same as the best block on the network, we're done.
    89  	if localBestHash == remoteBestHash {
    90  		glog.Infof("resync: synced at %d %s", localBestHeight, localBestHash)
    91  		return errSynced
    92  	}
    93  	if localBestHash != "" {
    94  		remoteHash, err := w.chain.GetBlockHash(localBestHeight)
    95  		// for some coins (eth) remote can be at lower best height after rollback
    96  		if err != nil && err != bchain.ErrBlockNotFound {
    97  			return err
    98  		}
    99  		if remoteHash != localBestHash {
   100  			// forked - the remote hash differs from the local hash at the same height
   101  			glog.Info("resync: local is forked at height ", localBestHeight, ", local hash ", localBestHash, ", remote hash", remoteHash)
   102  			return w.handleFork(localBestHeight, localBestHash, onNewBlock, initialSync)
   103  		}
   104  		glog.Info("resync: local at ", localBestHeight, " is behind")
   105  		w.startHeight = localBestHeight + 1
   106  	} else {
   107  		// database is empty, start genesis
   108  		glog.Info("resync: genesis from block ", w.startHeight)
   109  	}
   110  	w.startHash, err = w.chain.GetBlockHash(w.startHeight)
   111  	if err != nil {
   112  		return err
   113  	}
   114  	// if parallel operation is enabled and the number of blocks to be connected is large,
   115  	// use parallel routine to load majority of blocks
   116  	if w.syncWorkers > 1 {
   117  		remoteBestHeight, err := w.chain.GetBestBlockHeight()
   118  		if err != nil {
   119  			return err
   120  		}
   121  		if remoteBestHeight < w.startHeight {
   122  			glog.Error("resync: error - remote best height ", remoteBestHeight, " less than sync start height ", w.startHeight)
   123  			return errors.New("resync: remote best height error")
   124  		}
   125  		if remoteBestHeight-w.startHeight > uint32(w.syncChunk) {
   126  			glog.Infof("resync: parallel sync of blocks %d-%d, using %d workers", w.startHeight, remoteBestHeight, w.syncWorkers)
   127  			err = w.ConnectBlocksParallel(w.startHeight, remoteBestHeight)
   128  			if err != nil {
   129  				return err
   130  			}
   131  			// after parallel load finish the sync using standard way,
   132  			// new blocks may have been created in the meantime
   133  			return w.resyncIndex(onNewBlock, initialSync)
   134  		}
   135  	}
   136  	return w.connectBlocks(onNewBlock, initialSync)
   137  }
   138  
   139  func (w *SyncWorker) handleFork(localBestHeight uint32, localBestHash string, onNewBlock bchain.OnNewBlockFunc, initialSync bool) error {
   140  	// find forked blocks, disconnect them and then synchronize again
   141  	var height uint32
   142  	hashes := []string{localBestHash}
   143  	for height = localBestHeight - 1; height >= 0; height-- {
   144  		local, err := w.db.GetBlockHash(height)
   145  		if err != nil {
   146  			return err
   147  		}
   148  		if local == "" {
   149  			break
   150  		}
   151  		remote, err := w.chain.GetBlockHash(height)
   152  		// for some coins (eth) remote can be at lower best height after rollback
   153  		if err != nil && err != bchain.ErrBlockNotFound {
   154  			return err
   155  		}
   156  		if local == remote {
   157  			break
   158  		}
   159  		hashes = append(hashes, local)
   160  	}
   161  	if err := w.DisconnectBlocks(height+1, localBestHeight, hashes); err != nil {
   162  		return err
   163  	}
   164  	return w.resyncIndex(onNewBlock, initialSync)
   165  }
   166  
   167  func (w *SyncWorker) connectBlocks(onNewBlock bchain.OnNewBlockFunc, initialSync bool) error {
   168  	bch := make(chan blockResult, 8)
   169  	done := make(chan struct{})
   170  	defer close(done)
   171  
   172  	go w.getBlockChain(bch, done)
   173  
   174  	var lastRes, empty blockResult
   175  
   176  	connect := func(res blockResult) error {
   177  		lastRes = res
   178  		if res.err != nil {
   179  			return res.err
   180  		}
   181  		err := w.db.ConnectBlock(res.block)
   182  		if err != nil {
   183  			return err
   184  		}
   185  		if onNewBlock != nil {
   186  			onNewBlock(res.block.Hash, res.block.Height)
   187  		}
   188  		if res.block.Height > 0 && res.block.Height%1000 == 0 {
   189  			glog.Info("connected block ", res.block.Height, " ", res.block.Hash)
   190  		}
   191  
   192  		return nil
   193  	}
   194  
   195  	if initialSync {
   196  	ConnectLoop:
   197  		for {
   198  			select {
   199  			case <-w.chanOsSignal:
   200  				return errors.Errorf("connectBlocks interrupted at height %d", lastRes.block.Height)
   201  			case res := <-bch:
   202  				if res == empty {
   203  					break ConnectLoop
   204  				}
   205  				err := connect(res)
   206  				if err != nil {
   207  					return err
   208  				}
   209  			}
   210  		}
   211  	} else {
   212  		// while regular sync, OS sig is handled by waitForSignalAndShutdown
   213  		for res := range bch {
   214  			err := connect(res)
   215  			if err != nil {
   216  				return err
   217  			}
   218  		}
   219  	}
   220  
   221  	if lastRes.block != nil {
   222  		glog.Infof("resync: synced at %d %s", lastRes.block.Height, lastRes.block.Hash)
   223  	}
   224  
   225  	return nil
   226  }
   227  
   228  // ConnectBlocksParallel uses parallel goroutines to get data from blockchain daemon
   229  func (w *SyncWorker) ConnectBlocksParallel(lower, higher uint32) error {
   230  	type hashHeight struct {
   231  		hash   string
   232  		height uint32
   233  	}
   234  	var err error
   235  	var wg sync.WaitGroup
   236  	bch := make([]chan *bchain.Block, w.syncWorkers)
   237  	for i := 0; i < w.syncWorkers; i++ {
   238  		bch[i] = make(chan *bchain.Block)
   239  	}
   240  	hch := make(chan hashHeight, w.syncWorkers)
   241  	hchClosed := atomic.Value{}
   242  	hchClosed.Store(false)
   243  	writeBlockDone := make(chan struct{})
   244  	terminating := make(chan struct{})
   245  	writeBlockWorker := func() {
   246  		defer close(writeBlockDone)
   247  		bc, err := w.db.InitBulkConnect()
   248  		if err != nil {
   249  			glog.Error("sync: InitBulkConnect error ", err)
   250  		}
   251  		lastBlock := lower - 1
   252  		keep := uint32(w.chain.GetChainParser().KeepBlockAddresses())
   253  	WriteBlockLoop:
   254  		for {
   255  			select {
   256  			case b := <-bch[(lastBlock+1)%uint32(w.syncWorkers)]:
   257  				if b == nil {
   258  					// channel is closed and empty - work is done
   259  					break WriteBlockLoop
   260  				}
   261  				if b.Height != lastBlock+1 {
   262  					glog.Fatal("writeBlockWorker skipped block, expected block ", lastBlock+1, ", new block ", b.Height)
   263  				}
   264  				err := bc.ConnectBlock(b, b.Height+keep > higher)
   265  				if err != nil {
   266  					glog.Fatal("writeBlockWorker ", b.Height, " ", b.Hash, " error ", err)
   267  				}
   268  				lastBlock = b.Height
   269  			case <-terminating:
   270  				break WriteBlockLoop
   271  			}
   272  		}
   273  		err = bc.Close()
   274  		if err != nil {
   275  			glog.Error("sync: bulkconnect.Close error ", err)
   276  		}
   277  		glog.Info("WriteBlock exiting...")
   278  	}
   279  	getBlockWorker := func(i int) {
   280  		defer wg.Done()
   281  		var err error
   282  		var block *bchain.Block
   283  	GetBlockLoop:
   284  		for hh := range hch {
   285  			for {
   286  				block, err = w.chain.GetBlock(hh.hash, hh.height)
   287  				if err != nil {
   288  					// signal came while looping in the error loop
   289  					if hchClosed.Load() == true {
   290  						glog.Error("getBlockWorker ", i, " connect block error ", err, ". Exiting...")
   291  						return
   292  					}
   293  					glog.Error("getBlockWorker ", i, " connect block error ", err, ". Retrying...")
   294  					w.metrics.IndexResyncErrors.With(common.Labels{"error": err.Error()}).Inc()
   295  					time.Sleep(time.Millisecond * 500)
   296  				} else {
   297  					break
   298  				}
   299  			}
   300  			if w.dryRun {
   301  				continue
   302  			}
   303  			select {
   304  			case bch[hh.height%uint32(w.syncWorkers)] <- block:
   305  			case <-terminating:
   306  				break GetBlockLoop
   307  			}
   308  		}
   309  		glog.Info("getBlockWorker ", i, " exiting...")
   310  	}
   311  	for i := 0; i < w.syncWorkers; i++ {
   312  		wg.Add(1)
   313  		go getBlockWorker(i)
   314  	}
   315  	go writeBlockWorker()
   316  	var hash string
   317  	start := time.Now()
   318  	msTime := time.Now().Add(1 * time.Minute)
   319  ConnectLoop:
   320  	for h := lower; h <= higher; {
   321  		select {
   322  		case <-w.chanOsSignal:
   323  			err = errors.Errorf("connectBlocksParallel interrupted at height %d", h)
   324  			// signal all workers to terminate their loops (error loops are interrupted below)
   325  			close(terminating)
   326  			break ConnectLoop
   327  		default:
   328  			hash, err = w.chain.GetBlockHash(h)
   329  			if err != nil {
   330  				glog.Error("GetBlockHash error ", err)
   331  				w.metrics.IndexResyncErrors.With(common.Labels{"error": err.Error()}).Inc()
   332  				time.Sleep(time.Millisecond * 500)
   333  				continue
   334  			}
   335  			hch <- hashHeight{hash, h}
   336  			if h > 0 && h%1000 == 0 {
   337  				glog.Info("connecting block ", h, " ", hash, ", elapsed ", time.Since(start), " ", w.db.GetAndResetConnectBlockStats())
   338  				start = time.Now()
   339  			}
   340  			if msTime.Before(time.Now()) {
   341  				glog.Info(w.db.GetMemoryStats())
   342  				w.metrics.IndexDBSize.Set(float64(w.db.DatabaseSizeOnDisk()))
   343  				msTime = time.Now().Add(10 * time.Minute)
   344  			}
   345  			h++
   346  		}
   347  	}
   348  	close(hch)
   349  	// signal stop to workers that are in a error loop
   350  	hchClosed.Store(true)
   351  	// wait for workers and close bch that will stop writer loop
   352  	wg.Wait()
   353  	for i := 0; i < w.syncWorkers; i++ {
   354  		close(bch[i])
   355  	}
   356  	<-writeBlockDone
   357  	return err
   358  }
   359  
   360  type blockResult struct {
   361  	block *bchain.Block
   362  	err   error
   363  }
   364  
   365  func (w *SyncWorker) getBlockChain(out chan blockResult, done chan struct{}) {
   366  	defer close(out)
   367  
   368  	hash := w.startHash
   369  	height := w.startHeight
   370  
   371  	// some coins do not return Next hash
   372  	// must loop until error
   373  	for {
   374  		select {
   375  		case <-done:
   376  			return
   377  		default:
   378  		}
   379  		block, err := w.chain.GetBlock(hash, height)
   380  		if err != nil {
   381  			if err == bchain.ErrBlockNotFound {
   382  				break
   383  			}
   384  			out <- blockResult{err: err}
   385  			return
   386  		}
   387  		hash = block.Next
   388  		height++
   389  		out <- blockResult{block: block}
   390  	}
   391  }
   392  
   393  // DisconnectBlocks removes all data belonging to blocks in range lower-higher,
   394  func (w *SyncWorker) DisconnectBlocks(lower uint32, higher uint32, hashes []string) error {
   395  	glog.Infof("sync: disconnecting blocks %d-%d", lower, higher)
   396  	// if the chain is UTXO, always use DisconnectBlockRange
   397  	if w.chain.GetChainParser().IsUTXOChain() {
   398  		return w.db.DisconnectBlockRangeUTXO(lower, higher)
   399  	}
   400  	blocks := make([]*bchain.Block, len(hashes))
   401  	var err error
   402  	// try to get all blocks first to see if we can avoid full scan
   403  	for i, hash := range hashes {
   404  		blocks[i], err = w.chain.GetBlock(hash, 0)
   405  		if err != nil {
   406  			// cannot get a block, we must do full range scan
   407  			return w.db.DisconnectBlockRangeNonUTXO(lower, higher)
   408  		}
   409  	}
   410  	// got all blocks to be disconnected, disconnect them one after another
   411  	for i, block := range blocks {
   412  		glog.Info("Disconnecting block ", (int(higher) - i), " ", block.Hash)
   413  		if err = w.db.DisconnectBlock(block); err != nil {
   414  			return err
   415  		}
   416  	}
   417  	return nil
   418  }