github.com/avahowell/sia@v0.5.1-beta.0.20160524050156-83dcc3d37c94/modules/consensus/synchronize.go (about)

     1  package consensus
     2  
     3  import (
     4  	"errors"
     5  	"net"
     6  	"time"
     7  
     8  	"github.com/NebulousLabs/Sia/build"
     9  	"github.com/NebulousLabs/Sia/crypto"
    10  	"github.com/NebulousLabs/Sia/encoding"
    11  	"github.com/NebulousLabs/Sia/modules"
    12  	"github.com/NebulousLabs/Sia/types"
    13  
    14  	"github.com/NebulousLabs/bolt"
    15  )
    16  
    17  const (
    18  	// minNumOutbound is the minimum number of outbound peers required before ibd
    19  	// is confident we are synced.
    20  	minNumOutbound = 5
    21  )
    22  
    23  var (
    24  	// MaxCatchUpBlocks is the maxiumum number of blocks that can be given to
    25  	// the consensus set in a single iteration during the initial blockchain
    26  	// download.
    27  	MaxCatchUpBlocks = func() types.BlockHeight {
    28  		switch build.Release {
    29  		case "dev":
    30  			return 50
    31  		case "standard":
    32  			return 10
    33  		case "testing":
    34  			return 3
    35  		default:
    36  			panic("unrecognized build.Release")
    37  		}
    38  	}()
    39  	// sendBlocksTimeout is the timeout for the SendBlocks RPC.
    40  	sendBlocksTimeout = func() time.Duration {
    41  		switch build.Release {
    42  		case "dev":
    43  			return 40 * time.Second
    44  		case "standard":
    45  			return 5 * time.Minute
    46  		case "testing":
    47  			return 5 * time.Second
    48  		default:
    49  			panic("unrecognized build.Release")
    50  		}
    51  	}()
    52  	// minIBDWaitTime is the time threadedInitialBlockchainDownload waits before
    53  	// exiting if there are >= 1 and <= minNumOutbound peers synced. This timeout
    54  	// will primarily affect miners who have multiple nodes daisy chained off each
    55  	// other. Those nodes will likely have to wait minIBDWaitTime on every startup
    56  	// before IBD is done.
    57  	minIBDWaitTime = func() time.Duration {
    58  		switch build.Release {
    59  		case "dev":
    60  			return 80 * time.Second
    61  		case "standard":
    62  			return 90 * time.Minute
    63  		case "testing":
    64  			return 10 * time.Second
    65  		default:
    66  			panic("unrecognized build.Release")
    67  		}
    68  	}()
    69  	// ibdLoopDelay is the time that threadedInitialBlockchainDownload waits
    70  	// between attempts to synchronize with the network if the last attempt
    71  	// failed.
    72  	ibdLoopDelay = func() time.Duration {
    73  		switch build.Release {
    74  		case "dev":
    75  			return 1 * time.Second
    76  		case "standard":
    77  			return 10 * time.Second
    78  		case "testing":
    79  			return 100 * time.Millisecond
    80  		default:
    81  			panic("unrecognized build.Release")
    82  		}
    83  	}()
    84  
    85  	errSendBlocksStalled = errors.New("SendBlocks RPC timed and never received any blocks")
    86  )
    87  
    88  // blockHistory returns up to 32 block ids, starting with recent blocks and
    89  // then proving exponentially increasingly less recent blocks. The genesis
    90  // block is always included as the last block. This block history can be used
    91  // to find a common parent that is reasonably recent, usually the most recent
    92  // common parent is found, but always a common parent within a factor of 2 is
    93  // found.
    94  func blockHistory(tx *bolt.Tx) (blockIDs [32]types.BlockID) {
    95  	height := blockHeight(tx)
    96  	step := types.BlockHeight(1)
    97  	// The final step is to include the genesis block, which is why the final
    98  	// element is skipped during iteration.
    99  	for i := 0; i < 31; i++ {
   100  		// Include the next block.
   101  		blockID, err := getPath(tx, height)
   102  		if build.DEBUG && err != nil {
   103  			panic(err)
   104  		}
   105  		blockIDs[i] = blockID
   106  
   107  		// Determine the height of the next block to include and then increase
   108  		// the step size. The height must be decreased first to prevent
   109  		// underflow.
   110  		//
   111  		// `i >= 9` means that the first 10 blocks will be included, and then
   112  		// skipping will start.
   113  		if i >= 9 {
   114  			step *= 2
   115  		}
   116  		if height <= step {
   117  			break
   118  		}
   119  		height -= step
   120  	}
   121  	// Include the genesis block as the last element
   122  	blockID, err := getPath(tx, 0)
   123  	if build.DEBUG && err != nil {
   124  		panic(err)
   125  	}
   126  	blockIDs[31] = blockID
   127  	return blockIDs
   128  }
   129  
   130  // threadedReceiveBlocks is the calling end of the SendBlocks RPC.
   131  func (cs *ConsensusSet) threadedReceiveBlocks(conn modules.PeerConn) (returnErr error) {
   132  	// Set a deadline after which SendBlocks will timeout. During IBD, esepcially,
   133  	// SendBlocks will timeout. This is by design so that IBD switches peers to
   134  	// prevent any one peer from stalling IBD.
   135  	err := conn.SetDeadline(time.Now().Add(sendBlocksTimeout))
   136  	// Ignore errors returned by SetDeadline if the conn is a pipe in testing.
   137  	// Pipes do not support Set{,Read,Write}Deadline and should only be used in
   138  	// testing.
   139  	if opErr, ok := err.(*net.OpError); ok && opErr.Op == "set" && opErr.Net == "pipe" && build.Release == "testing" {
   140  		err = nil
   141  	}
   142  	if err != nil {
   143  		return err
   144  	}
   145  	stalled := true
   146  	defer func() {
   147  		// TODO: Timeout errors returned by muxado do not conform to the net.Error
   148  		// interface and therefore we cannot check if the error is a timeout using
   149  		// the Timeout() method. Once muxado issue #14 is resolved change the below
   150  		// condition to:
   151  		//     if netErr, ok := returnErr.(net.Error); ok && netErr.Timeout() && stalled { ... }
   152  		if stalled && returnErr != nil && (returnErr.Error() == "Read timeout" || returnErr.Error() == "Write timeout") {
   153  			returnErr = errSendBlocksStalled
   154  		}
   155  	}()
   156  
   157  	// Get blockIDs to send.
   158  	var history [32]types.BlockID
   159  	cs.mu.RLock()
   160  	err = cs.db.View(func(tx *bolt.Tx) error {
   161  		history = blockHistory(tx)
   162  		return nil
   163  	})
   164  	cs.mu.RUnlock()
   165  	if err != nil {
   166  		return err
   167  	}
   168  
   169  	// Send the block ids.
   170  	if err := encoding.WriteObject(conn, history); err != nil {
   171  		return err
   172  	}
   173  
   174  	// Broadcast the last block accepted. This functionality is in a defer to
   175  	// ensure that a block is always broadcast if any blocks are accepted. This
   176  	// is to stop an attacker from preventing block broadcasts.
   177  	chainExtended := false
   178  	defer func() {
   179  		if chainExtended && cs.Synced() {
   180  			// The last block received will be the current block since
   181  			// managedAcceptBlock only returns nil if a block extends the longest chain.
   182  			currentBlock := cs.CurrentBlock()
   183  			// COMPATv0.5.1 - broadcast the block to all peers <= v0.5.1 and block header to all peers > v0.5.1
   184  			var relayBlockPeers, relayHeaderPeers []modules.Peer
   185  			for _, p := range cs.gateway.Peers() {
   186  				if build.VersionCmp(p.Version, "0.5.1") <= 0 {
   187  					relayBlockPeers = append(relayBlockPeers, p)
   188  				} else {
   189  					relayHeaderPeers = append(relayHeaderPeers, p)
   190  				}
   191  			}
   192  			go cs.gateway.Broadcast("RelayBlock", currentBlock, relayBlockPeers)
   193  			go cs.gateway.Broadcast("RelayHeader", currentBlock.Header(), relayHeaderPeers)
   194  		}
   195  	}()
   196  
   197  	// Read blocks off of the wire and add them to the consensus set until
   198  	// there are no more blocks available.
   199  	moreAvailable := true
   200  	for moreAvailable {
   201  		// Read a slice of blocks from the wire.
   202  		var newBlocks []types.Block
   203  		if err := encoding.ReadObject(conn, &newBlocks, uint64(MaxCatchUpBlocks)*types.BlockSizeLimit); err != nil {
   204  			return err
   205  		}
   206  		if err := encoding.ReadObject(conn, &moreAvailable, 1); err != nil {
   207  			return err
   208  		}
   209  
   210  		// Integrate the blocks into the consensus set.
   211  		for _, block := range newBlocks {
   212  			stalled = false
   213  			// Call managedAcceptBlock instead of AcceptBlock so as not to broadcast
   214  			// every block.
   215  			acceptErr := cs.managedAcceptBlock(block)
   216  			// Set a flag to indicate that we should broadcast the last block received.
   217  			if acceptErr == nil {
   218  				chainExtended = true
   219  			}
   220  			// ErrNonExtendingBlock must be ignored until headers-first block
   221  			// sharing is implemented, block already in database should also be
   222  			// ignored.
   223  			if acceptErr == modules.ErrNonExtendingBlock || acceptErr == modules.ErrBlockKnown {
   224  				acceptErr = nil
   225  			}
   226  			if acceptErr != nil {
   227  				return acceptErr
   228  			}
   229  		}
   230  	}
   231  	return nil
   232  }
   233  
   234  // rpcSendBlocks is the receiving end of the SendBlocks RPC. It returns a
   235  // sequential set of blocks based on the 32 input block IDs. The most recent
   236  // known ID is used as the starting point, and up to 'MaxCatchUpBlocks' from
   237  // that BlockHeight onwards are returned. It also sends a boolean indicating
   238  // whether more blocks are available.
   239  func (cs *ConsensusSet) rpcSendBlocks(conn modules.PeerConn) error {
   240  	// Read a list of blocks known to the requester and find the most recent
   241  	// block from the current path.
   242  	var knownBlocks [32]types.BlockID
   243  	err := encoding.ReadObject(conn, &knownBlocks, 32*crypto.HashSize)
   244  	if err != nil {
   245  		return err
   246  	}
   247  
   248  	// Find the most recent block from knownBlocks in the current path.
   249  	found := false
   250  	var start types.BlockHeight
   251  	var csHeight types.BlockHeight
   252  	cs.mu.RLock()
   253  	err = cs.db.View(func(tx *bolt.Tx) error {
   254  		csHeight = blockHeight(tx)
   255  		for _, id := range knownBlocks {
   256  			pb, err := getBlockMap(tx, id)
   257  			if err != nil {
   258  				continue
   259  			}
   260  			pathID, err := getPath(tx, pb.Height)
   261  			if err != nil {
   262  				continue
   263  			}
   264  			if pathID != pb.Block.ID() {
   265  				continue
   266  			}
   267  			if pb.Height == csHeight {
   268  				break
   269  			}
   270  			found = true
   271  			// Start from the child of the common block.
   272  			start = pb.Height + 1
   273  			break
   274  		}
   275  		return nil
   276  	})
   277  	cs.mu.RUnlock()
   278  	if err != nil {
   279  		return err
   280  	}
   281  
   282  	// If no matching blocks are found, or if the caller has all known blocks,
   283  	// don't send any blocks.
   284  	if !found {
   285  		// Send 0 blocks.
   286  		err = encoding.WriteObject(conn, []types.Block{})
   287  		if err != nil {
   288  			return err
   289  		}
   290  		// Indicate that no more blocks are available.
   291  		return encoding.WriteObject(conn, false)
   292  	}
   293  
   294  	// Send the caller all of the blocks that they are missing.
   295  	moreAvailable := true
   296  	for moreAvailable {
   297  		// Get the set of blocks to send.
   298  		var blocks []types.Block
   299  		cs.mu.RLock()
   300  		err = cs.db.View(func(tx *bolt.Tx) error {
   301  			height := blockHeight(tx)
   302  			for i := start; i <= height && i < start+MaxCatchUpBlocks; i++ {
   303  				id, err := getPath(tx, i)
   304  				if build.DEBUG && err != nil {
   305  					panic(err)
   306  				}
   307  				pb, err := getBlockMap(tx, id)
   308  				if build.DEBUG && err != nil {
   309  					panic(err)
   310  				}
   311  				blocks = append(blocks, pb.Block)
   312  			}
   313  			moreAvailable = start+MaxCatchUpBlocks <= height
   314  			start += MaxCatchUpBlocks
   315  			return nil
   316  		})
   317  		cs.mu.RUnlock()
   318  		if err != nil {
   319  			return err
   320  		}
   321  
   322  		// Send a set of blocks to the caller + a flag indicating whether more
   323  		// are available.
   324  		if err = encoding.WriteObject(conn, blocks); err != nil {
   325  			return err
   326  		}
   327  		if err = encoding.WriteObject(conn, moreAvailable); err != nil {
   328  			return err
   329  		}
   330  	}
   331  
   332  	return nil
   333  }
   334  
   335  // rpcRelayBlock is an RPC that accepts a block from a peer.
   336  // COMPATv0.5.1
   337  func (cs *ConsensusSet) rpcRelayBlock(conn modules.PeerConn) error {
   338  	// Decode the block from the connection.
   339  	var b types.Block
   340  	err := encoding.ReadObject(conn, &b, types.BlockSizeLimit)
   341  	if err != nil {
   342  		return err
   343  	}
   344  
   345  	// Submit the block to the consensus set and broadcast it.
   346  	err = cs.AcceptBlock(b)
   347  	if err == errOrphan {
   348  		// If the block is an orphan, try to find the parents. The block
   349  		// received from the peer is discarded and will be downloaded again if
   350  		// the parent is found.
   351  		go func() {
   352  			err := cs.gateway.RPC(modules.NetAddress(conn.RemoteAddr().String()), "SendBlocks", cs.threadedReceiveBlocks)
   353  			if err != nil {
   354  				cs.log.Debugln("WARN: failed to get parents of orphan block:", err)
   355  			}
   356  		}()
   357  	}
   358  	if err != nil {
   359  		return err
   360  	}
   361  	return nil
   362  }
   363  
   364  // rpcRelayHeader is an RPC that accepts a block header from a peer.
   365  func (cs *ConsensusSet) rpcRelayHeader(conn modules.PeerConn) error {
   366  	// Decode the block header from the connection.
   367  	var h types.BlockHeader
   368  	err := encoding.ReadObject(conn, &h, types.BlockHeaderSize)
   369  	if err != nil {
   370  		return err
   371  	}
   372  
   373  	// Start verification inside of a bolt View tx.
   374  	cs.mu.RLock()
   375  	err = cs.db.View(func(tx *bolt.Tx) error {
   376  		// Do some relatively inexpensive checks to validate the header
   377  		return cs.validateHeader(boltTxWrapper{tx}, h)
   378  	})
   379  	cs.mu.RUnlock()
   380  	if err == errOrphan {
   381  		// If the header is an orphan, try to find the parents.
   382  		go func() {
   383  			err := cs.gateway.RPC(modules.NetAddress(conn.RemoteAddr().String()), "SendBlocks", cs.threadedReceiveBlocks)
   384  			if err != nil {
   385  				cs.log.Debugln("WARN: failed to get parents of orphan header:", err)
   386  			}
   387  		}()
   388  		return nil
   389  	} else if err != nil {
   390  		return err
   391  	}
   392  	// If the header is valid and extends the heaviest chain, fetch, accept it,
   393  	// and broadcast it.
   394  	go func() {
   395  		err := cs.gateway.RPC(modules.NetAddress(conn.RemoteAddr().String()), "SendBlk", cs.threadedReceiveBlock(h.ID()))
   396  		if err != nil {
   397  			cs.log.Debugln("WARN: failed to get header's corresponding block:", err)
   398  		}
   399  	}()
   400  	return nil
   401  }
   402  
   403  // rpcSendBlk is an RPC that sends the requested block to the requesting peer.
   404  func (cs *ConsensusSet) rpcSendBlk(conn modules.PeerConn) error {
   405  	// Decode the block id from the conneciton.
   406  	var id types.BlockID
   407  	err := encoding.ReadObject(conn, &id, crypto.HashSize)
   408  	if err != nil {
   409  		return err
   410  	}
   411  	// Lookup the corresponding block.
   412  	var b types.Block
   413  	cs.mu.RLock()
   414  	err = cs.db.View(func(tx *bolt.Tx) error {
   415  		pb, err := getBlockMap(tx, id)
   416  		if err != nil {
   417  			return err
   418  		}
   419  		b = pb.Block
   420  		return nil
   421  	})
   422  	cs.mu.RUnlock()
   423  	if err != nil {
   424  		return err
   425  	}
   426  	// Encode and send the block to the caller.
   427  	err = encoding.WriteObject(conn, b)
   428  	if err != nil {
   429  		return err
   430  	}
   431  	return nil
   432  }
   433  
   434  // threadedReceiveBlock takes a block id and returns an RPCFunc that requests
   435  // that block and then calls AcceptBlock on it. The returned function should be
   436  // used as the calling end of the SendBlk RPC. Note that although the function
   437  // itself does not do any locking, it is still prefixed with "threaded" because
   438  // the function it returns calls the exported method AcceptBlock.
   439  func (cs *ConsensusSet) threadedReceiveBlock(id types.BlockID) modules.RPCFunc {
   440  	managedFN := func(conn modules.PeerConn) error {
   441  		if err := encoding.WriteObject(conn, id); err != nil {
   442  			return err
   443  		}
   444  		var block types.Block
   445  		if err := encoding.ReadObject(conn, &block, types.BlockSizeLimit); err != nil {
   446  			return err
   447  		}
   448  		if err := cs.AcceptBlock(block); err != nil {
   449  			return err
   450  		}
   451  		return nil
   452  	}
   453  	return managedFN
   454  }
   455  
   456  // threadedInitialBlockchainDownload performs the IBD on outbound peers. Blocks
   457  // are downloaded from one peer at a time in 5 minute intervals, so as to
   458  // prevent any one peer from significantly slowing down IBD.
   459  //
   460  // NOTE: IBD will succeed right now when each peer has a different blockchain.
   461  // The height and the block id of the remote peers' current blocks are not
   462  // checked to be the same. This can cause issues if you are connected to
   463  // outbound peers <= v0.5.1 that are stalled in IBD.
   464  func (cs *ConsensusSet) threadedInitialBlockchainDownload() {
   465  	// Set the deadline 10 minutes in the future. After this deadline, we will say
   466  	// IBD is done as long as there is at least one outbound peer synced.
   467  	deadline := time.Now().Add(minIBDWaitTime)
   468  	numOutboundSynced := 0
   469  	for {
   470  		numOutboundSynced = 0
   471  		for _, p := range cs.gateway.Peers() {
   472  			// We only sync on outbound peers at first to make IBD less susceptible to
   473  			// fast-mining and other attacks, as outbound peers are more difficult to
   474  			// manipulate.
   475  			if p.Inbound {
   476  				continue
   477  			}
   478  
   479  			err := cs.gateway.RPC(p.NetAddress, "SendBlocks", cs.threadedReceiveBlocks)
   480  			if err == nil {
   481  				numOutboundSynced++
   482  				continue
   483  			}
   484  			// TODO: Timeout errors returned by muxado do not conform to the net.Error
   485  			// interface and therefore we cannot check if the error is a timeout using
   486  			// the Timeout() method. Once muxado issue #14 is resolved change the below
   487  			// condition to:
   488  			//     if netErr, ok := returnErr.(net.Error); !ok || !netErr.Timeout() { ... }
   489  			if err.Error() != "Read timeout" && err.Error() != "Write timeout" {
   490  				cs.log.Printf("WARN: disconnecting from peer %v because IBD failed: %v", p.NetAddress, err)
   491  				// Disconnect if there is an unexpected error (not a timeout). This
   492  				// includes errSendBlocksStalled.
   493  				//
   494  				// We disconnect so that these peers are removed from gateway.Peers() and
   495  				// do not prevent us from marking ourselves as fully synced.
   496  				err := cs.gateway.Disconnect(p.NetAddress)
   497  				if err != nil {
   498  					cs.log.Printf("WARN: disconnecting from peer %v failed: %v", p.NetAddress, err)
   499  				}
   500  			}
   501  		}
   502  
   503  		// If we have minNumOutbound peers synced, we are done. Otherwise, don't say
   504  		// we are synced until we've been doing ibd for 10 minutes and we are synced
   505  		// with at least one peer.
   506  		if numOutboundSynced >= minNumOutbound || (numOutboundSynced > 0 && time.Now().After(deadline)) {
   507  			break
   508  		} else {
   509  			// Sleep so we don't hammer the network with SendBlock requests.
   510  			time.Sleep(ibdLoopDelay)
   511  		}
   512  	}
   513  
   514  	cs.log.Printf("INFO: IBD done, synced with %v peers", numOutboundSynced)
   515  }
   516  
   517  // Synced returns true if the consensus set is synced with the network.
   518  func (cs *ConsensusSet) Synced() bool {
   519  	cs.mu.RLock()
   520  	defer cs.mu.RUnlock()
   521  	return cs.synced
   522  }