gitlab.com/SiaPrime/SiaPrime@v1.4.1/modules/consensus/synchronize.go (about) 1 package consensus 2 3 import ( 4 "net" 5 "sync" 6 "time" 7 8 "gitlab.com/NebulousLabs/errors" 9 10 "gitlab.com/SiaPrime/SiaPrime/build" 11 "gitlab.com/SiaPrime/SiaPrime/crypto" 12 "gitlab.com/SiaPrime/SiaPrime/encoding" 13 "gitlab.com/SiaPrime/SiaPrime/modules" 14 "gitlab.com/SiaPrime/SiaPrime/types" 15 16 bolt "github.com/coreos/bbolt" 17 ) 18 19 const ( 20 // minNumOutbound is the minimum number of outbound peers required before ibd 21 // is confident we are synced. 22 minNumOutbound = 1 23 ) 24 25 var ( 26 errEarlyStop = errors.New("initial blockchain download did not complete by the time shutdown was issued") 27 errNilProcBlock = errors.New("nil processed block was fetched from the database") 28 errSendBlocksStalled = errors.New("SendBlocks RPC timed and never received any blocks") 29 30 // ibdLoopDelay is the time that threadedInitialBlockchainDownload waits 31 // between attempts to synchronize with the network if the last attempt 32 // failed. 33 ibdLoopDelay = build.Select(build.Var{ 34 Standard: 10 * time.Second, 35 Dev: 1 * time.Second, 36 Testing: 100 * time.Millisecond, 37 }).(time.Duration) 38 39 // MaxCatchUpBlocks is the maxiumum number of blocks that can be given to 40 // the consensus set in a single iteration during the initial blockchain 41 // download. 42 MaxCatchUpBlocks = build.Select(build.Var{ 43 Standard: types.BlockHeight(10), 44 Dev: types.BlockHeight(50), 45 Testing: types.BlockHeight(3), 46 }).(types.BlockHeight) 47 48 // minIBDWaitTime is the time threadedInitialBlockchainDownload waits before 49 // exiting if there are >= 1 and <= minNumOutbound peers synced. This timeout 50 // will primarily affect miners who have multiple nodes daisy chained off each 51 // other. Those nodes will likely have to wait minIBDWaitTime on every startup 52 // before IBD is done. 53 minIBDWaitTime = build.Select(build.Var{ 54 Standard: 5 * time.Minute, 55 Dev: 1 * time.Second, 56 Testing: 1 * time.Second, 57 }).(time.Duration) 58 59 // relayHeaderTimeout is the timeout for the RelayHeader RPC. 60 relayHeaderTimeout = build.Select(build.Var{ 61 Standard: 60 * time.Second, 62 Dev: 20 * time.Second, 63 Testing: 3 * time.Second, 64 }).(time.Duration) 65 66 // sendBlkTimeout is the timeout for the SendBlk RPC. 67 sendBlkTimeout = build.Select(build.Var{ 68 Standard: 90 * time.Second, 69 Dev: 30 * time.Second, 70 Testing: 4 * time.Second, 71 }).(time.Duration) 72 73 // sendBlocksTimeout is the timeout for the SendBlocks RPC. 74 sendBlocksTimeout = build.Select(build.Var{ 75 Standard: 180 * time.Second, 76 Dev: 40 * time.Second, 77 Testing: 5 * time.Second, 78 }).(time.Duration) 79 ) 80 81 // isTimeoutErr is a helper function that returns true if err was caused by a 82 // network timeout. 83 func isTimeoutErr(err error) bool { 84 if err == nil { 85 return false 86 } 87 if netErr, ok := err.(net.Error); ok && netErr.Timeout() { 88 return true 89 } 90 // COMPATv1.3.0 91 return (err.Error() == "Read timeout" || err.Error() == "Write timeout") 92 } 93 94 // blockHistory returns up to 32 block ids, starting with recent blocks and 95 // then proving exponentially increasingly less recent blocks. The genesis 96 // block is always included as the last block. This block history can be used 97 // to find a common parent that is reasonably recent, usually the most recent 98 // common parent is found, but always a common parent within a factor of 2 is 99 // found. 100 func blockHistory(tx *bolt.Tx) (blockIDs [32]types.BlockID) { 101 height := blockHeight(tx) 102 step := types.BlockHeight(1) 103 // The final step is to include the genesis block, which is why the final 104 // element is skipped during iteration. 105 for i := 0; i < 31; i++ { 106 // Include the next block. 107 blockID, err := getPath(tx, height) 108 if build.DEBUG && err != nil { 109 panic(err) 110 } 111 blockIDs[i] = blockID 112 113 // Determine the height of the next block to include and then increase 114 // the step size. The height must be decreased first to prevent 115 // underflow. 116 // 117 // `i >= 9` means that the first 10 blocks will be included, and then 118 // skipping will start. 119 if i >= 9 { 120 step *= 2 121 } 122 if height <= step { 123 break 124 } 125 height -= step 126 } 127 // Include the genesis block as the last element 128 blockID, err := getPath(tx, 0) 129 if build.DEBUG && err != nil { 130 panic(err) 131 } 132 blockIDs[31] = blockID 133 return blockIDs 134 } 135 136 // managedReceiveBlocks is the calling end of the SendBlocks RPC, without the 137 // threadgroup wrapping. 138 func (cs *ConsensusSet) managedReceiveBlocks(conn modules.PeerConn) (returnErr error) { 139 // Set a deadline after which SendBlocks will timeout. During IBD, especially, 140 // SendBlocks will timeout. This is by design so that IBD switches peers to 141 // prevent any one peer from stalling IBD. 142 err := conn.SetDeadline(time.Now().Add(sendBlocksTimeout)) 143 if err != nil { 144 return err 145 } 146 finishedChan := make(chan struct{}) 147 defer close(finishedChan) 148 go func() { 149 select { 150 case <-cs.tg.StopChan(): 151 case <-finishedChan: 152 } 153 conn.Close() 154 }() 155 156 // Check whether this RPC has timed out with the remote peer at the end of 157 // the fuction, and if so, return a custom error to signal that a new peer 158 // needs to be chosen. 159 stalled := true 160 defer func() { 161 if isTimeoutErr(returnErr) && stalled { 162 returnErr = errSendBlocksStalled 163 } 164 }() 165 166 // Get blockIDs to send. 167 var history [32]types.BlockID 168 cs.mu.RLock() 169 err = cs.db.View(func(tx *bolt.Tx) error { 170 history = blockHistory(tx) 171 return nil 172 }) 173 cs.mu.RUnlock() 174 if err != nil { 175 return err 176 } 177 178 // Send the block ids. 179 if err := encoding.WriteObject(conn, history); err != nil { 180 return err 181 } 182 183 // Broadcast the last block accepted. This functionality is in a defer to 184 // ensure that a block is always broadcast if any blocks are accepted. This 185 // is to stop an attacker from preventing block broadcasts. 186 var initialBlock types.BlockID 187 if build.DEBUG { 188 // Prepare for a sanity check on 'chainExtended' - chain extended should 189 // be set to true if an ony if the result of calling dbCurrentBlockID 190 // changes. 191 initialBlock = cs.dbCurrentBlockID() 192 } 193 chainExtended := false 194 defer func() { 195 cs.mu.RLock() 196 synced := cs.synced 197 cs.mu.RUnlock() 198 if synced && chainExtended { 199 if build.DEBUG && initialBlock == cs.dbCurrentBlockID() { 200 panic("blockchain extension reporting is incorrect") 201 } 202 fullBlock := cs.managedCurrentBlock() // TODO: Add cacheing, replace this line by looking at the cache. 203 go cs.gateway.Broadcast("RelayHeader", fullBlock.Header(), cs.gateway.Peers()) 204 } 205 }() 206 207 // Read blocks off of the wire and add them to the consensus set until 208 // there are no more blocks available. 209 moreAvailable := true 210 for moreAvailable { 211 // Read a slice of blocks from the wire. 212 var newBlocks []types.Block 213 if err := encoding.ReadObject(conn, &newBlocks, uint64(MaxCatchUpBlocks)*types.BlockSizeLimit); err != nil { 214 if err.Error() == "timeout" { 215 return errSendBlocksStalled 216 } 217 return err 218 } 219 if err := encoding.ReadObject(conn, &moreAvailable, 1); err != nil { 220 return err 221 } 222 if len(newBlocks) == 0 { 223 continue 224 } 225 stalled = false 226 227 // Call managedAcceptBlock instead of AcceptBlock so as not to broadcast 228 // every block. 229 extended, acceptErr := cs.managedAcceptBlocks(newBlocks) 230 if extended { 231 chainExtended = true 232 } 233 // ErrNonExtendingBlock must be ignored until headers-first block 234 // sharing is implemented, block already in database should also be 235 // ignored. 236 if acceptErr != nil && acceptErr != modules.ErrNonExtendingBlock && acceptErr != modules.ErrBlockKnown { 237 return acceptErr 238 } 239 } 240 return nil 241 } 242 243 // threadedReceiveBlocks is the calling end of the SendBlocks RPC. 244 func (cs *ConsensusSet) threadedReceiveBlocks(conn modules.PeerConn) error { 245 err := conn.SetDeadline(time.Now().Add(sendBlocksTimeout)) 246 if err != nil { 247 return err 248 } 249 finishedChan := make(chan struct{}) 250 defer close(finishedChan) 251 go func() { 252 select { 253 case <-cs.tg.StopChan(): 254 case <-finishedChan: 255 } 256 conn.Close() 257 }() 258 err = cs.tg.Add() 259 if err != nil { 260 return err 261 } 262 defer cs.tg.Done() 263 return cs.managedReceiveBlocks(conn) 264 } 265 266 // rpcSendBlocks is the receiving end of the SendBlocks RPC. It returns a 267 // sequential set of blocks based on the 32 input block IDs. The most recent 268 // known ID is used as the starting point, and up to 'MaxCatchUpBlocks' from 269 // that BlockHeight onwards are returned. It also sends a boolean indicating 270 // whether more blocks are available. 271 func (cs *ConsensusSet) rpcSendBlocks(conn modules.PeerConn) error { 272 err := conn.SetDeadline(time.Now().Add(sendBlocksTimeout)) 273 if err != nil { 274 return err 275 } 276 finishedChan := make(chan struct{}) 277 defer close(finishedChan) 278 go func() { 279 select { 280 case <-cs.tg.StopChan(): 281 case <-finishedChan: 282 } 283 conn.Close() 284 }() 285 err = cs.tg.Add() 286 if err != nil { 287 return err 288 } 289 defer cs.tg.Done() 290 291 // Read a list of blocks known to the requester and find the most recent 292 // block from the current path. 293 var knownBlocks [32]types.BlockID 294 err = encoding.ReadObject(conn, &knownBlocks, 32*crypto.HashSize) 295 if err != nil { 296 return err 297 } 298 299 // Find the most recent block from knownBlocks in the current path. 300 found := false 301 var start types.BlockHeight 302 var csHeight types.BlockHeight 303 cs.mu.RLock() 304 err = cs.db.View(func(tx *bolt.Tx) error { 305 csHeight = blockHeight(tx) 306 for _, id := range knownBlocks { 307 pb, err := getBlockMap(tx, id) 308 if err != nil { 309 continue 310 } 311 pathID, err := getPath(tx, pb.Height) 312 if err != nil { 313 continue 314 } 315 if pathID != pb.Block.ID() { 316 continue 317 } 318 if pb.Height == csHeight { 319 break 320 } 321 found = true 322 // Start from the child of the common block. 323 start = pb.Height + 1 324 break 325 } 326 return nil 327 }) 328 cs.mu.RUnlock() 329 if err != nil { 330 return err 331 } 332 333 // If no matching blocks are found, or if the caller has all known blocks, 334 // don't send any blocks. 335 if !found { 336 // Send 0 blocks. 337 err = encoding.WriteObject(conn, []types.Block{}) 338 if err != nil { 339 return err 340 } 341 // Indicate that no more blocks are available. 342 return encoding.WriteObject(conn, false) 343 } 344 345 // Send the caller all of the blocks that they are missing. 346 moreAvailable := true 347 for moreAvailable { 348 // Get the set of blocks to send. 349 var blocks []types.Block 350 cs.mu.RLock() 351 err = cs.db.View(func(tx *bolt.Tx) error { 352 height := blockHeight(tx) 353 for i := start; i <= height && i < start+MaxCatchUpBlocks; i++ { 354 id, err := getPath(tx, i) 355 if err != nil { 356 cs.log.Critical("Unable to get path: height", height, ":: request", i) 357 return err 358 } 359 pb, err := getBlockMap(tx, id) 360 if err != nil { 361 cs.log.Critical("Unable to get block from block map: height", height, ":: request", i, ":: id", id) 362 return err 363 } 364 if pb == nil { 365 cs.log.Critical("getBlockMap yielded 'nil' block:", height, ":: request", i, ":: id", id) 366 return errNilProcBlock 367 } 368 blocks = append(blocks, pb.Block) 369 } 370 moreAvailable = start+MaxCatchUpBlocks <= height 371 start += MaxCatchUpBlocks 372 return nil 373 }) 374 cs.mu.RUnlock() 375 if err != nil { 376 return err 377 } 378 379 // Send a set of blocks to the caller + a flag indicating whether more 380 // are available. 381 if err = encoding.WriteObject(conn, blocks); err != nil { 382 return err 383 } 384 if err = encoding.WriteObject(conn, moreAvailable); err != nil { 385 return err 386 } 387 } 388 389 return nil 390 } 391 392 // threadedRPCRelayHeader is an RPC that accepts a block header from a peer. 393 func (cs *ConsensusSet) threadedRPCRelayHeader(conn modules.PeerConn) error { 394 err := conn.SetDeadline(time.Now().Add(relayHeaderTimeout)) 395 if err != nil { 396 return err 397 } 398 finishedChan := make(chan struct{}) 399 defer close(finishedChan) 400 go func() { 401 select { 402 case <-cs.tg.StopChan(): 403 case <-finishedChan: 404 } 405 conn.Close() 406 }() 407 err = cs.tg.Add() 408 if err != nil { 409 return err 410 } 411 wg := new(sync.WaitGroup) 412 defer func() { 413 go func() { 414 wg.Wait() 415 cs.tg.Done() 416 }() 417 }() 418 419 // Decode the block header from the connection. 420 var h types.BlockHeader 421 err = encoding.ReadObject(conn, &h, types.BlockHeaderSize) 422 if err != nil { 423 return err 424 } 425 426 // Start verification inside of a bolt View tx. 427 cs.mu.RLock() 428 err = cs.db.View(func(tx *bolt.Tx) error { 429 // Do some relatively inexpensive checks to validate the header 430 return cs.validateHeader(boltTxWrapper{tx}, h) 431 }) 432 cs.mu.RUnlock() 433 // WARN: orphan multithreading logic (dangerous areas, see below) 434 // 435 // If the header is valid and extends the heaviest chain, fetch the 436 // corresponding block. Call needs to be made in a separate goroutine 437 // because an exported call to the gateway is used, which is a deadlock 438 // risk given that rpcRelayHeader is called from the gateway. 439 // 440 // NOTE: In general this is bad design. Rather than recycling other 441 // calls, the whole protocol should have been kept in a single RPC. 442 // Because it is not, we have to do weird threading to prevent 443 // deadlocks, and we also have to be concerned every time the code in 444 // managedReceiveBlock is adjusted. 445 if err == errOrphan { // WARN: orphan multithreading logic case #1 446 wg.Add(1) 447 go func() { 448 defer wg.Done() 449 err := cs.gateway.RPC(conn.RPCAddr(), "SendBlocks", cs.managedReceiveBlocks) 450 if err != nil { 451 cs.log.Debugln("WARN: failed to get parents of orphan header:", err) 452 } 453 }() 454 return nil 455 } else if err != nil { 456 return err 457 } 458 459 // WARN: orphan multithreading logic case #2 460 wg.Add(1) 461 go func() { 462 defer wg.Done() 463 err = cs.gateway.RPC(conn.RPCAddr(), "SendBlk", cs.managedReceiveBlock(h.ID())) 464 if err != nil { 465 cs.log.Debugln("WARN: failed to get header's corresponding block:", err) 466 } 467 }() 468 return nil 469 } 470 471 // rpcSendBlk is an RPC that sends the requested block to the requesting peer. 472 func (cs *ConsensusSet) rpcSendBlk(conn modules.PeerConn) error { 473 err := conn.SetDeadline(time.Now().Add(sendBlkTimeout)) 474 if err != nil { 475 return err 476 } 477 finishedChan := make(chan struct{}) 478 defer close(finishedChan) 479 go func() { 480 select { 481 case <-cs.tg.StopChan(): 482 case <-finishedChan: 483 } 484 conn.Close() 485 }() 486 err = cs.tg.Add() 487 if err != nil { 488 return err 489 } 490 defer cs.tg.Done() 491 492 // Decode the block id from the connection. 493 var id types.BlockID 494 err = encoding.ReadObject(conn, &id, crypto.HashSize) 495 if err != nil { 496 return err 497 } 498 // Lookup the corresponding block. 499 var b types.Block 500 cs.mu.RLock() 501 err = cs.db.View(func(tx *bolt.Tx) error { 502 pb, err := getBlockMap(tx, id) 503 if err != nil { 504 return err 505 } 506 b = pb.Block 507 return nil 508 }) 509 cs.mu.RUnlock() 510 if err != nil { 511 return err 512 } 513 // Encode and send the block to the caller. 514 err = encoding.WriteObject(conn, b) 515 if err != nil { 516 return err 517 } 518 return nil 519 } 520 521 // managedReceiveBlock takes a block id and returns an RPCFunc that requests that 522 // block and then calls AcceptBlock on it. The returned function should be used 523 // as the calling end of the SendBlk RPC. 524 func (cs *ConsensusSet) managedReceiveBlock(id types.BlockID) modules.RPCFunc { 525 return func(conn modules.PeerConn) error { 526 if err := encoding.WriteObject(conn, id); err != nil { 527 return err 528 } 529 var block types.Block 530 if err := encoding.ReadObject(conn, &block, types.BlockSizeLimit); err != nil { 531 return err 532 } 533 chainExtended, err := cs.managedAcceptBlocks([]types.Block{block}) 534 if chainExtended { 535 cs.managedBroadcastBlock(block) 536 } 537 if err != nil { 538 return err 539 } 540 return nil 541 } 542 } 543 544 // threadedInitialBlockchainDownload performs the IBD on outbound peers. Blocks 545 // are downloaded from one peer at a time in 5 minute intervals, so as to 546 // prevent any one peer from significantly slowing down IBD. 547 // 548 // NOTE: IBD will succeed right now when each peer has a different blockchain. 549 // The height and the block id of the remote peers' current blocks are not 550 // checked to be the same. This can cause issues if you are connected to 551 // outbound peers <= v0.5.1 that are stalled in IBD. 552 func (cs *ConsensusSet) threadedInitialBlockchainDownload() error { 553 // The consensus set will not recognize IBD as complete until it has enough 554 // peers. After the deadline though, it will recognize the blockchain 555 // download as complete even with only one peer. This deadline is helpful 556 // to local-net setups, where a machine will frequently only have one peer 557 // (and that peer will be another machine on the same local network, but 558 // within the local network at least one peer is connected to the broad 559 // network). 560 deadline := time.Now().Add(minIBDWaitTime) 561 numOutboundSynced := 0 562 numOutboundNotSynced := 0 563 for { 564 numOutboundSynced = 0 565 numOutboundNotSynced = 0 566 for _, p := range cs.gateway.Peers() { 567 // We only sync on outbound peers at first to make IBD less susceptible to 568 // fast-mining and other attacks, as outbound peers are more difficult to 569 // manipulate. 570 if p.Inbound { 571 continue 572 } 573 574 // Put the rest of the iteration inside of a thread group. 575 err := func() error { 576 err := cs.tg.Add() 577 if err != nil { 578 return err 579 } 580 defer cs.tg.Done() 581 582 // Request blocks from the peer. The error returned will only be 583 // 'nil' if there are no more blocks to receive. 584 err = cs.gateway.RPC(p.NetAddress, "SendBlocks", cs.managedReceiveBlocks) 585 if err == nil { 586 numOutboundSynced++ 587 // In this case, 'return nil' is equivalent to skipping to 588 // the next iteration of the loop. 589 return nil 590 } 591 numOutboundNotSynced++ 592 if !isTimeoutErr(err) { 593 cs.log.Printf("WARN: disconnecting from peer %v because IBD failed: %v", p.NetAddress, err) 594 // Disconnect if there is an unexpected error (not a timeout). This 595 // includes errSendBlocksStalled. 596 // 597 // We disconnect so that these peers are removed from gateway.Peers() and 598 // do not prevent us from marking ourselves as fully synced. 599 err := cs.gateway.Disconnect(p.NetAddress) 600 if err != nil { 601 cs.log.Printf("WARN: disconnecting from peer %v failed: %v", p.NetAddress, err) 602 } 603 } 604 return nil 605 }() 606 if err != nil { 607 return err 608 } 609 } 610 611 // The consensus set is not considered synced until a majority of 612 // outbound peers say that we are synced. If less than 10 minutes have 613 // passed, a minimum of 'minNumOutbound' peers must say that we are 614 // synced, otherwise a 1 vs 0 majority is sufficient. 615 // 616 // This scheme is used to prevent malicious peers from being able to 617 // barricade the sync'd status of the consensus set, and to make sure 618 // that consensus sets behind a firewall with only one peer 619 // (potentially a local peer) are still able to eventually conclude 620 // that they have syncrhonized. Miners and hosts will often have setups 621 // beind a firewall where there is a single node with many peers and 622 // then the rest of the nodes only have a few peers. 623 if numOutboundSynced > numOutboundNotSynced && (numOutboundSynced >= minNumOutbound || time.Now().After(deadline)) { 624 break 625 } else { 626 // Sleep so we don't hammer the network with SendBlock requests. 627 time.Sleep(ibdLoopDelay) 628 } 629 } 630 631 cs.log.Printf("INFO: IBD done, synced with %v peers", numOutboundSynced) 632 return nil 633 } 634 635 // Synced returns true if the consensus set is synced with the network. 636 func (cs *ConsensusSet) Synced() bool { 637 err := cs.tg.Add() 638 if err != nil { 639 return false 640 } 641 defer cs.tg.Done() 642 cs.mu.RLock() 643 defer cs.mu.RUnlock() 644 return cs.synced 645 }