github.com/NebulousLabs/Sia@v1.3.7/modules/consensus/synchronize.go (about) 1 package consensus 2 3 import ( 4 "errors" 5 "net" 6 "sync" 7 "time" 8 9 "github.com/NebulousLabs/Sia/build" 10 "github.com/NebulousLabs/Sia/crypto" 11 "github.com/NebulousLabs/Sia/encoding" 12 "github.com/NebulousLabs/Sia/modules" 13 "github.com/NebulousLabs/Sia/types" 14 15 "github.com/coreos/bbolt" 16 ) 17 18 const ( 19 // minNumOutbound is the minimum number of outbound peers required before ibd 20 // is confident we are synced. 21 minNumOutbound = 5 22 ) 23 24 var ( 25 errEarlyStop = errors.New("initial blockchain download did not complete by the time shutdown was issued") 26 errNilProcBlock = errors.New("nil processed block was fetched from the database") 27 errSendBlocksStalled = errors.New("SendBlocks RPC timed and never received any blocks") 28 29 // ibdLoopDelay is the time that threadedInitialBlockchainDownload waits 30 // between attempts to synchronize with the network if the last attempt 31 // failed. 32 ibdLoopDelay = build.Select(build.Var{ 33 Standard: 10 * time.Second, 34 Dev: 1 * time.Second, 35 Testing: 100 * time.Millisecond, 36 }).(time.Duration) 37 38 // MaxCatchUpBlocks is the maxiumum number of blocks that can be given to 39 // the consensus set in a single iteration during the initial blockchain 40 // download. 41 MaxCatchUpBlocks = build.Select(build.Var{ 42 Standard: types.BlockHeight(10), 43 Dev: types.BlockHeight(50), 44 Testing: types.BlockHeight(3), 45 }).(types.BlockHeight) 46 47 // minIBDWaitTime is the time threadedInitialBlockchainDownload waits before 48 // exiting if there are >= 1 and <= minNumOutbound peers synced. This timeout 49 // will primarily affect miners who have multiple nodes daisy chained off each 50 // other. Those nodes will likely have to wait minIBDWaitTime on every startup 51 // before IBD is done. 52 minIBDWaitTime = build.Select(build.Var{ 53 Standard: 90 * time.Minute, 54 Dev: 80 * time.Second, 55 Testing: 10 * time.Second, 56 }).(time.Duration) 57 58 // relayHeaderTimeout is the timeout for the RelayHeader RPC. 59 relayHeaderTimeout = build.Select(build.Var{ 60 Standard: 60 * time.Second, 61 Dev: 20 * time.Second, 62 Testing: 3 * time.Second, 63 }).(time.Duration) 64 65 // sendBlkTimeout is the timeout for the SendBlk RPC. 66 sendBlkTimeout = build.Select(build.Var{ 67 Standard: 90 * time.Second, 68 Dev: 30 * time.Second, 69 Testing: 4 * time.Second, 70 }).(time.Duration) 71 72 // sendBlocksTimeout is the timeout for the SendBlocks RPC. 73 sendBlocksTimeout = build.Select(build.Var{ 74 Standard: 180 * time.Second, 75 Dev: 40 * time.Second, 76 Testing: 5 * time.Second, 77 }).(time.Duration) 78 ) 79 80 // isTimeoutErr is a helper function that returns true if err was caused by a 81 // network timeout. 82 func isTimeoutErr(err error) bool { 83 if err == nil { 84 return false 85 } 86 if netErr, ok := err.(net.Error); ok && netErr.Timeout() { 87 return true 88 } 89 // COMPATv1.3.0 90 return (err.Error() == "Read timeout" || err.Error() == "Write timeout") 91 } 92 93 // blockHistory returns up to 32 block ids, starting with recent blocks and 94 // then proving exponentially increasingly less recent blocks. The genesis 95 // block is always included as the last block. This block history can be used 96 // to find a common parent that is reasonably recent, usually the most recent 97 // common parent is found, but always a common parent within a factor of 2 is 98 // found. 99 func blockHistory(tx *bolt.Tx) (blockIDs [32]types.BlockID) { 100 height := blockHeight(tx) 101 step := types.BlockHeight(1) 102 // The final step is to include the genesis block, which is why the final 103 // element is skipped during iteration. 104 for i := 0; i < 31; i++ { 105 // Include the next block. 106 blockID, err := getPath(tx, height) 107 if build.DEBUG && err != nil { 108 panic(err) 109 } 110 blockIDs[i] = blockID 111 112 // Determine the height of the next block to include and then increase 113 // the step size. The height must be decreased first to prevent 114 // underflow. 115 // 116 // `i >= 9` means that the first 10 blocks will be included, and then 117 // skipping will start. 118 if i >= 9 { 119 step *= 2 120 } 121 if height <= step { 122 break 123 } 124 height -= step 125 } 126 // Include the genesis block as the last element 127 blockID, err := getPath(tx, 0) 128 if build.DEBUG && err != nil { 129 panic(err) 130 } 131 blockIDs[31] = blockID 132 return blockIDs 133 } 134 135 // managedReceiveBlocks is the calling end of the SendBlocks RPC, without the 136 // threadgroup wrapping. 137 func (cs *ConsensusSet) managedReceiveBlocks(conn modules.PeerConn) (returnErr error) { 138 // Set a deadline after which SendBlocks will timeout. During IBD, especially, 139 // SendBlocks will timeout. This is by design so that IBD switches peers to 140 // prevent any one peer from stalling IBD. 141 err := conn.SetDeadline(time.Now().Add(sendBlocksTimeout)) 142 if err != nil { 143 return err 144 } 145 finishedChan := make(chan struct{}) 146 defer close(finishedChan) 147 go func() { 148 select { 149 case <-cs.tg.StopChan(): 150 case <-finishedChan: 151 } 152 conn.Close() 153 }() 154 155 // Check whether this RPC has timed out with the remote peer at the end of 156 // the fuction, and if so, return a custom error to signal that a new peer 157 // needs to be chosen. 158 stalled := true 159 defer func() { 160 if isTimeoutErr(returnErr) && stalled { 161 returnErr = errSendBlocksStalled 162 } 163 }() 164 165 // Get blockIDs to send. 166 var history [32]types.BlockID 167 cs.mu.RLock() 168 err = cs.db.View(func(tx *bolt.Tx) error { 169 history = blockHistory(tx) 170 return nil 171 }) 172 cs.mu.RUnlock() 173 if err != nil { 174 return err 175 } 176 177 // Send the block ids. 178 if err := encoding.WriteObject(conn, history); err != nil { 179 return err 180 } 181 182 // Broadcast the last block accepted. This functionality is in a defer to 183 // ensure that a block is always broadcast if any blocks are accepted. This 184 // is to stop an attacker from preventing block broadcasts. 185 var initialBlock types.BlockID 186 if build.DEBUG { 187 // Prepare for a sanity check on 'chainExtended' - chain extended should 188 // be set to true if an ony if the result of calling dbCurrentBlockID 189 // changes. 190 initialBlock = cs.dbCurrentBlockID() 191 } 192 chainExtended := false 193 defer func() { 194 cs.mu.RLock() 195 synced := cs.synced 196 cs.mu.RUnlock() 197 if synced && chainExtended { 198 if build.DEBUG && initialBlock == cs.dbCurrentBlockID() { 199 panic("blockchain extension reporting is incorrect") 200 } 201 fullBlock := cs.managedCurrentBlock() // TODO: Add cacheing, replace this line by looking at the cache. 202 go cs.gateway.Broadcast("RelayHeader", fullBlock.Header(), cs.gateway.Peers()) 203 } 204 }() 205 206 // Read blocks off of the wire and add them to the consensus set until 207 // there are no more blocks available. 208 moreAvailable := true 209 for moreAvailable { 210 // Read a slice of blocks from the wire. 211 var newBlocks []types.Block 212 if err := encoding.ReadObject(conn, &newBlocks, uint64(MaxCatchUpBlocks)*types.BlockSizeLimit); err != nil { 213 return err 214 } 215 if err := encoding.ReadObject(conn, &moreAvailable, 1); err != nil { 216 return err 217 } 218 if len(newBlocks) == 0 { 219 continue 220 } 221 stalled = false 222 223 // Call managedAcceptBlock instead of AcceptBlock so as not to broadcast 224 // every block. 225 extended, acceptErr := cs.managedAcceptBlocks(newBlocks) 226 if extended { 227 chainExtended = true 228 } 229 // ErrNonExtendingBlock must be ignored until headers-first block 230 // sharing is implemented, block already in database should also be 231 // ignored. 232 if acceptErr != nil && acceptErr != modules.ErrNonExtendingBlock && acceptErr != modules.ErrBlockKnown { 233 return acceptErr 234 } 235 } 236 return nil 237 } 238 239 // threadedReceiveBlocks is the calling end of the SendBlocks RPC. 240 func (cs *ConsensusSet) threadedReceiveBlocks(conn modules.PeerConn) error { 241 err := conn.SetDeadline(time.Now().Add(sendBlocksTimeout)) 242 if err != nil { 243 return err 244 } 245 finishedChan := make(chan struct{}) 246 defer close(finishedChan) 247 go func() { 248 select { 249 case <-cs.tg.StopChan(): 250 case <-finishedChan: 251 } 252 conn.Close() 253 }() 254 err = cs.tg.Add() 255 if err != nil { 256 return err 257 } 258 defer cs.tg.Done() 259 return cs.managedReceiveBlocks(conn) 260 } 261 262 // rpcSendBlocks is the receiving end of the SendBlocks RPC. It returns a 263 // sequential set of blocks based on the 32 input block IDs. The most recent 264 // known ID is used as the starting point, and up to 'MaxCatchUpBlocks' from 265 // that BlockHeight onwards are returned. It also sends a boolean indicating 266 // whether more blocks are available. 267 func (cs *ConsensusSet) rpcSendBlocks(conn modules.PeerConn) error { 268 err := conn.SetDeadline(time.Now().Add(sendBlocksTimeout)) 269 if err != nil { 270 return err 271 } 272 finishedChan := make(chan struct{}) 273 defer close(finishedChan) 274 go func() { 275 select { 276 case <-cs.tg.StopChan(): 277 case <-finishedChan: 278 } 279 conn.Close() 280 }() 281 err = cs.tg.Add() 282 if err != nil { 283 return err 284 } 285 defer cs.tg.Done() 286 287 // Read a list of blocks known to the requester and find the most recent 288 // block from the current path. 289 var knownBlocks [32]types.BlockID 290 err = encoding.ReadObject(conn, &knownBlocks, 32*crypto.HashSize) 291 if err != nil { 292 return err 293 } 294 295 // Find the most recent block from knownBlocks in the current path. 296 found := false 297 var start types.BlockHeight 298 var csHeight types.BlockHeight 299 cs.mu.RLock() 300 err = cs.db.View(func(tx *bolt.Tx) error { 301 csHeight = blockHeight(tx) 302 for _, id := range knownBlocks { 303 pb, err := getBlockMap(tx, id) 304 if err != nil { 305 continue 306 } 307 pathID, err := getPath(tx, pb.Height) 308 if err != nil { 309 continue 310 } 311 if pathID != pb.Block.ID() { 312 continue 313 } 314 if pb.Height == csHeight { 315 break 316 } 317 found = true 318 // Start from the child of the common block. 319 start = pb.Height + 1 320 break 321 } 322 return nil 323 }) 324 cs.mu.RUnlock() 325 if err != nil { 326 return err 327 } 328 329 // If no matching blocks are found, or if the caller has all known blocks, 330 // don't send any blocks. 331 if !found { 332 // Send 0 blocks. 333 err = encoding.WriteObject(conn, []types.Block{}) 334 if err != nil { 335 return err 336 } 337 // Indicate that no more blocks are available. 338 return encoding.WriteObject(conn, false) 339 } 340 341 // Send the caller all of the blocks that they are missing. 342 moreAvailable := true 343 for moreAvailable { 344 // Get the set of blocks to send. 345 var blocks []types.Block 346 cs.mu.RLock() 347 err = cs.db.View(func(tx *bolt.Tx) error { 348 height := blockHeight(tx) 349 for i := start; i <= height && i < start+MaxCatchUpBlocks; i++ { 350 id, err := getPath(tx, i) 351 if err != nil { 352 cs.log.Critical("Unable to get path: height", height, ":: request", i) 353 return err 354 } 355 pb, err := getBlockMap(tx, id) 356 if err != nil { 357 cs.log.Critical("Unable to get block from block map: height", height, ":: request", i, ":: id", id) 358 return err 359 } 360 if pb == nil { 361 cs.log.Critical("getBlockMap yielded 'nil' block:", height, ":: request", i, ":: id", id) 362 return errNilProcBlock 363 } 364 blocks = append(blocks, pb.Block) 365 } 366 moreAvailable = start+MaxCatchUpBlocks <= height 367 start += MaxCatchUpBlocks 368 return nil 369 }) 370 cs.mu.RUnlock() 371 if err != nil { 372 return err 373 } 374 375 // Send a set of blocks to the caller + a flag indicating whether more 376 // are available. 377 if err = encoding.WriteObject(conn, blocks); err != nil { 378 return err 379 } 380 if err = encoding.WriteObject(conn, moreAvailable); err != nil { 381 return err 382 } 383 } 384 385 return nil 386 } 387 388 // threadedRPCRelayHeader is an RPC that accepts a block header from a peer. 389 func (cs *ConsensusSet) threadedRPCRelayHeader(conn modules.PeerConn) error { 390 err := conn.SetDeadline(time.Now().Add(relayHeaderTimeout)) 391 if err != nil { 392 return err 393 } 394 finishedChan := make(chan struct{}) 395 defer close(finishedChan) 396 go func() { 397 select { 398 case <-cs.tg.StopChan(): 399 case <-finishedChan: 400 } 401 conn.Close() 402 }() 403 err = cs.tg.Add() 404 if err != nil { 405 return err 406 } 407 wg := new(sync.WaitGroup) 408 defer func() { 409 go func() { 410 wg.Wait() 411 cs.tg.Done() 412 }() 413 }() 414 415 // Decode the block header from the connection. 416 var h types.BlockHeader 417 err = encoding.ReadObject(conn, &h, types.BlockHeaderSize) 418 if err != nil { 419 return err 420 } 421 422 // Start verification inside of a bolt View tx. 423 cs.mu.RLock() 424 err = cs.db.View(func(tx *bolt.Tx) error { 425 // Do some relatively inexpensive checks to validate the header 426 return cs.validateHeader(boltTxWrapper{tx}, h) 427 }) 428 cs.mu.RUnlock() 429 // WARN: orphan multithreading logic (dangerous areas, see below) 430 // 431 // If the header is valid and extends the heaviest chain, fetch the 432 // corresponding block. Call needs to be made in a separate goroutine 433 // because an exported call to the gateway is used, which is a deadlock 434 // risk given that rpcRelayHeader is called from the gateway. 435 // 436 // NOTE: In general this is bad design. Rather than recycling other 437 // calls, the whole protocol should have been kept in a single RPC. 438 // Because it is not, we have to do weird threading to prevent 439 // deadlocks, and we also have to be concerned every time the code in 440 // managedReceiveBlock is adjusted. 441 if err == errOrphan { // WARN: orphan multithreading logic case #1 442 wg.Add(1) 443 go func() { 444 defer wg.Done() 445 err := cs.gateway.RPC(conn.RPCAddr(), "SendBlocks", cs.managedReceiveBlocks) 446 if err != nil { 447 cs.log.Debugln("WARN: failed to get parents of orphan header:", err) 448 } 449 }() 450 return nil 451 } else if err != nil { 452 return err 453 } 454 455 // WARN: orphan multithreading logic case #2 456 wg.Add(1) 457 go func() { 458 defer wg.Done() 459 err = cs.gateway.RPC(conn.RPCAddr(), "SendBlk", cs.managedReceiveBlock(h.ID())) 460 if err != nil { 461 cs.log.Debugln("WARN: failed to get header's corresponding block:", err) 462 } 463 }() 464 return nil 465 } 466 467 // rpcSendBlk is an RPC that sends the requested block to the requesting peer. 468 func (cs *ConsensusSet) rpcSendBlk(conn modules.PeerConn) error { 469 err := conn.SetDeadline(time.Now().Add(sendBlkTimeout)) 470 if err != nil { 471 return err 472 } 473 finishedChan := make(chan struct{}) 474 defer close(finishedChan) 475 go func() { 476 select { 477 case <-cs.tg.StopChan(): 478 case <-finishedChan: 479 } 480 conn.Close() 481 }() 482 err = cs.tg.Add() 483 if err != nil { 484 return err 485 } 486 defer cs.tg.Done() 487 488 // Decode the block id from the connection. 489 var id types.BlockID 490 err = encoding.ReadObject(conn, &id, crypto.HashSize) 491 if err != nil { 492 return err 493 } 494 // Lookup the corresponding block. 495 var b types.Block 496 cs.mu.RLock() 497 err = cs.db.View(func(tx *bolt.Tx) error { 498 pb, err := getBlockMap(tx, id) 499 if err != nil { 500 return err 501 } 502 b = pb.Block 503 return nil 504 }) 505 cs.mu.RUnlock() 506 if err != nil { 507 return err 508 } 509 // Encode and send the block to the caller. 510 err = encoding.WriteObject(conn, b) 511 if err != nil { 512 return err 513 } 514 return nil 515 } 516 517 // managedReceiveBlock takes a block id and returns an RPCFunc that requests that 518 // block and then calls AcceptBlock on it. The returned function should be used 519 // as the calling end of the SendBlk RPC. 520 func (cs *ConsensusSet) managedReceiveBlock(id types.BlockID) modules.RPCFunc { 521 return func(conn modules.PeerConn) error { 522 if err := encoding.WriteObject(conn, id); err != nil { 523 return err 524 } 525 var block types.Block 526 if err := encoding.ReadObject(conn, &block, types.BlockSizeLimit); err != nil { 527 return err 528 } 529 chainExtended, err := cs.managedAcceptBlocks([]types.Block{block}) 530 if chainExtended { 531 cs.managedBroadcastBlock(block) 532 } 533 if err != nil { 534 return err 535 } 536 return nil 537 } 538 } 539 540 // threadedInitialBlockchainDownload performs the IBD on outbound peers. Blocks 541 // are downloaded from one peer at a time in 5 minute intervals, so as to 542 // prevent any one peer from significantly slowing down IBD. 543 // 544 // NOTE: IBD will succeed right now when each peer has a different blockchain. 545 // The height and the block id of the remote peers' current blocks are not 546 // checked to be the same. This can cause issues if you are connected to 547 // outbound peers <= v0.5.1 that are stalled in IBD. 548 func (cs *ConsensusSet) threadedInitialBlockchainDownload() error { 549 // The consensus set will not recognize IBD as complete until it has enough 550 // peers. After the deadline though, it will recognize the blockchain 551 // download as complete even with only one peer. This deadline is helpful 552 // to local-net setups, where a machine will frequently only have one peer 553 // (and that peer will be another machine on the same local network, but 554 // within the local network at least one peer is connected to the braod 555 // network). 556 deadline := time.Now().Add(minIBDWaitTime) 557 numOutboundSynced := 0 558 numOutboundNotSynced := 0 559 for { 560 numOutboundSynced = 0 561 numOutboundNotSynced = 0 562 for _, p := range cs.gateway.Peers() { 563 // We only sync on outbound peers at first to make IBD less susceptible to 564 // fast-mining and other attacks, as outbound peers are more difficult to 565 // manipulate. 566 if p.Inbound { 567 continue 568 } 569 570 // Put the rest of the iteration inside of a thread group. 571 err := func() error { 572 err := cs.tg.Add() 573 if err != nil { 574 return err 575 } 576 defer cs.tg.Done() 577 578 // Request blocks from the peer. The error returned will only be 579 // 'nil' if there are no more blocks to receive. 580 err = cs.gateway.RPC(p.NetAddress, "SendBlocks", cs.managedReceiveBlocks) 581 if err == nil { 582 numOutboundSynced++ 583 // In this case, 'return nil' is equivalent to skipping to 584 // the next iteration of the loop. 585 return nil 586 } 587 numOutboundNotSynced++ 588 if !isTimeoutErr(err) { 589 cs.log.Printf("WARN: disconnecting from peer %v because IBD failed: %v", p.NetAddress, err) 590 // Disconnect if there is an unexpected error (not a timeout). This 591 // includes errSendBlocksStalled. 592 // 593 // We disconnect so that these peers are removed from gateway.Peers() and 594 // do not prevent us from marking ourselves as fully synced. 595 err := cs.gateway.Disconnect(p.NetAddress) 596 if err != nil { 597 cs.log.Printf("WARN: disconnecting from peer %v failed: %v", p.NetAddress, err) 598 } 599 } 600 return nil 601 }() 602 if err != nil { 603 return err 604 } 605 } 606 607 // The consensus set is not considered synced until a majority of 608 // outbound peers say that we are synced. If less than 10 minutes have 609 // passed, a minimum of 'minNumOutbound' peers must say that we are 610 // synced, otherwise a 1 vs 0 majority is sufficient. 611 // 612 // This scheme is used to prevent malicious peers from being able to 613 // barricade the sync'd status of the consensus set, and to make sure 614 // that consensus sets behind a firewall with only one peer 615 // (potentially a local peer) are still able to eventually conclude 616 // that they have syncrhonized. Miners and hosts will often have setups 617 // beind a firewall where there is a single node with many peers and 618 // then the rest of the nodes only have a few peers. 619 if numOutboundSynced > numOutboundNotSynced && (numOutboundSynced >= minNumOutbound || time.Now().After(deadline)) { 620 break 621 } else { 622 // Sleep so we don't hammer the network with SendBlock requests. 623 time.Sleep(ibdLoopDelay) 624 } 625 } 626 627 cs.log.Printf("INFO: IBD done, synced with %v peers", numOutboundSynced) 628 return nil 629 } 630 631 // Synced returns true if the consensus set is synced with the network. 632 func (cs *ConsensusSet) Synced() bool { 633 err := cs.tg.Add() 634 if err != nil { 635 return false 636 } 637 defer cs.tg.Done() 638 cs.mu.RLock() 639 defer cs.mu.RUnlock() 640 return cs.synced 641 }