github.com/inklabsfoundation/inkchain@v0.17.1-0.20181025012015-c3cef8062f19/gossip/state/state.go (about) 1 /* 2 Copyright IBM Corp. 2016 All Rights Reserved. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package state 18 19 import ( 20 "bytes" 21 "errors" 22 "fmt" 23 "sync" 24 "sync/atomic" 25 "time" 26 27 pb "github.com/golang/protobuf/proto" 28 "github.com/inklabsfoundation/inkchain/core/committer" 29 "github.com/inklabsfoundation/inkchain/gossip/api" 30 "github.com/inklabsfoundation/inkchain/gossip/comm" 31 common2 "github.com/inklabsfoundation/inkchain/gossip/common" 32 "github.com/inklabsfoundation/inkchain/gossip/discovery" 33 "github.com/inklabsfoundation/inkchain/gossip/util" 34 "github.com/inklabsfoundation/inkchain/protos/common" 35 proto "github.com/inklabsfoundation/inkchain/protos/gossip" 36 "github.com/op/go-logging" 37 ) 38 39 // GossipStateProvider is the interface to acquire sequences of the ledger blocks 40 // capable to full fill missing blocks by running state replication and 41 // sending request to get missing block to other nodes 42 type GossipStateProvider interface { 43 // Retrieve block with sequence number equal to index 44 GetBlock(index uint64) *common.Block 45 46 AddPayload(payload *proto.Payload) error 47 48 // Stop terminates state transfer object 49 Stop() 50 } 51 52 const ( 53 defAntiEntropyInterval = 10 * time.Second 54 defAntiEntropyStateResponseTimeout = 3 * time.Second 55 defAntiEntropyBatchSize = 10 56 57 defChannelBufferSize = 100 58 defAntiEntropyMaxRetries = 3 59 60 defMaxBlockDistance = 100 61 ) 62 63 // GossipAdapter defines gossip/communication required interface for state provider 64 type GossipAdapter interface { 65 // Send sends a message to remote peers 66 Send(msg *proto.GossipMessage, peers ...*comm.RemotePeer) 67 68 // Accept returns a dedicated read-only channel for messages sent by other nodes that match a certain predicate. 69 // If passThrough is false, the messages are processed by the gossip layer beforehand. 70 // If passThrough is true, the gossip layer doesn't intervene and the messages 71 // can be used to send a reply back to the sender 72 Accept(acceptor common2.MessageAcceptor, passThrough bool) (<-chan *proto.GossipMessage, <-chan proto.ReceivedMessage) 73 74 // UpdateChannelMetadata updates the self metadata the peer 75 // publishes to other peers about its channel-related state 76 UpdateChannelMetadata(metadata []byte, chainID common2.ChainID) 77 78 // PeersOfChannel returns the NetworkMembers considered alive 79 // and also subscribed to the channel given 80 PeersOfChannel(common2.ChainID) []discovery.NetworkMember 81 } 82 83 // GossipStateProviderImpl the implementation of the GossipStateProvider interface 84 // the struct to handle in memory sliding window of 85 // new ledger block to be acquired by hyper ledger 86 type GossipStateProviderImpl struct { 87 // MessageCryptoService 88 mcs api.MessageCryptoService 89 90 // Chain id 91 chainID string 92 93 // The gossiping service 94 gossip GossipAdapter 95 96 // Channel to read gossip messages from 97 gossipChan <-chan *proto.GossipMessage 98 99 commChan <-chan proto.ReceivedMessage 100 101 // Queue of payloads which wasn't acquired yet 102 payloads PayloadsBuffer 103 104 committer committer.Committer 105 106 stateResponseCh chan proto.ReceivedMessage 107 108 stateRequestCh chan proto.ReceivedMessage 109 110 stopCh chan struct{} 111 112 done sync.WaitGroup 113 114 once sync.Once 115 116 stateTransferActive int32 117 } 118 119 var logger *logging.Logger // package-level logger 120 121 func init() { 122 logger = util.GetLogger(util.LoggingStateModule, "") 123 } 124 125 // NewGossipStateProvider creates initialized instance of gossip state provider 126 func NewGossipStateProvider(chainID string, g GossipAdapter, committer committer.Committer, mcs api.MessageCryptoService) GossipStateProvider { 127 logger := util.GetLogger(util.LoggingStateModule, "") 128 129 gossipChan, _ := g.Accept(func(message interface{}) bool { 130 // Get only data messages 131 return message.(*proto.GossipMessage).IsDataMsg() && 132 bytes.Equal(message.(*proto.GossipMessage).Channel, []byte(chainID)) 133 }, false) 134 135 remoteStateMsgFilter := func(message interface{}) bool { 136 receivedMsg := message.(proto.ReceivedMessage) 137 msg := receivedMsg.GetGossipMessage() 138 if !msg.IsRemoteStateMessage() { 139 return false 140 } 141 // If we're not running with authentication, no point 142 // in enforcing access control 143 if !receivedMsg.GetConnectionInfo().IsAuthenticated() { 144 return true 145 } 146 connInfo := receivedMsg.GetConnectionInfo() 147 authErr := mcs.VerifyByChannel(msg.Channel, connInfo.Identity, connInfo.Auth.Signature, connInfo.Auth.SignedData) 148 if authErr != nil { 149 logger.Warning("Got unauthorized nodeMetastate transfer request from", string(connInfo.Identity)) 150 return false 151 } 152 return true 153 } 154 155 // Filter message which are only relevant for nodeMetastate transfer 156 _, commChan := g.Accept(remoteStateMsgFilter, true) 157 158 height, err := committer.LedgerHeight() 159 if height == 0 { 160 // Panic here since this is an indication of invalid situation which should not happen in normal 161 // code path. 162 logger.Panic("Committer height cannot be zero, ledger should include at least one block (genesis).") 163 } 164 165 if err != nil { 166 logger.Error("Could not read ledger info to obtain current ledger height due to: ", err) 167 // Exiting as without ledger it will be impossible 168 // to deliver new blocks 169 return nil 170 } 171 172 s := &GossipStateProviderImpl{ 173 // MessageCryptoService 174 mcs: mcs, 175 176 // Chain ID 177 chainID: chainID, 178 179 // Instance of the gossip 180 gossip: g, 181 182 // Channel to read new messages from 183 gossipChan: gossipChan, 184 185 // Channel to read direct messages from other peers 186 commChan: commChan, 187 188 // Create a queue for payload received 189 payloads: NewPayloadsBuffer(height), 190 191 committer: committer, 192 193 stateResponseCh: make(chan proto.ReceivedMessage, defChannelBufferSize), 194 195 stateRequestCh: make(chan proto.ReceivedMessage, defChannelBufferSize), 196 197 stopCh: make(chan struct{}, 1), 198 199 stateTransferActive: 0, 200 201 once: sync.Once{}, 202 } 203 204 nodeMetastate := NewNodeMetastate(height - 1) 205 206 logger.Infof("Updating node metadata information, "+ 207 "current ledger sequence is at = %d, next expected block is = %d", nodeMetastate.LedgerHeight, s.payloads.Next()) 208 209 b, err := nodeMetastate.Bytes() 210 if err == nil { 211 logger.Debug("Updating gossip metadate nodeMetastate", nodeMetastate) 212 g.UpdateChannelMetadata(b, common2.ChainID(s.chainID)) 213 } else { 214 logger.Errorf("Unable to serialize node meta nodeMetastate, error = %s", err) 215 } 216 217 s.done.Add(4) 218 219 // Listen for incoming communication 220 go s.listen() 221 // Deliver in order messages into the incoming channel 222 go s.deliverPayloads() 223 // Execute anti entropy to fill missing gaps 224 go s.antiEntropy() 225 // Taking care of state request messages 226 go s.processStateRequests() 227 228 return s 229 } 230 231 func (s *GossipStateProviderImpl) listen() { 232 defer s.done.Done() 233 234 for { 235 select { 236 case msg := <-s.gossipChan: 237 logger.Debug("Received new message via gossip channel") 238 go s.queueNewMessage(msg) 239 case msg := <-s.commChan: 240 logger.Debug("Direct message ", msg) 241 go s.directMessage(msg) 242 case <-s.stopCh: 243 s.stopCh <- struct{}{} 244 logger.Debug("Stop listening for new messages") 245 return 246 } 247 } 248 } 249 250 func (s *GossipStateProviderImpl) directMessage(msg proto.ReceivedMessage) { 251 logger.Debug("[ENTER] -> directMessage") 252 defer logger.Debug("[EXIT] -> directMessage") 253 254 if msg == nil { 255 logger.Error("Got nil message via end-to-end channel, should not happen!") 256 return 257 } 258 259 if !bytes.Equal(msg.GetGossipMessage().Channel, []byte(s.chainID)) { 260 logger.Warning("Received state transfer request for channel", 261 string(msg.GetGossipMessage().Channel), "while expecting channel", s.chainID, "skipping request...") 262 return 263 } 264 265 incoming := msg.GetGossipMessage() 266 267 if incoming.GetStateRequest() != nil { 268 if len(s.stateRequestCh) < defChannelBufferSize { 269 // Forward state request to the channel, if there are too 270 // many message of state request ignore to avoid flooding. 271 s.stateRequestCh <- msg 272 } 273 } else if incoming.GetStateResponse() != nil { 274 // If no state transfer procedure activate there is 275 // no reason to process the message 276 if atomic.LoadInt32(&s.stateTransferActive) == 1 { 277 // Send signal of state response message 278 s.stateResponseCh <- msg 279 } 280 } 281 } 282 283 func (s *GossipStateProviderImpl) processStateRequests() { 284 defer s.done.Done() 285 286 for { 287 select { 288 case msg := <-s.stateRequestCh: 289 s.handleStateRequest(msg) 290 case <-s.stopCh: 291 s.stopCh <- struct{}{} 292 return 293 } 294 } 295 } 296 297 // Handle state request message, validate batch size, read current leader state to 298 // obtain required blocks, build response message and send it back 299 func (s *GossipStateProviderImpl) handleStateRequest(msg proto.ReceivedMessage) { 300 if msg == nil { 301 return 302 } 303 request := msg.GetGossipMessage().GetStateRequest() 304 305 batchSize := request.EndSeqNum - request.StartSeqNum 306 if batchSize > defAntiEntropyBatchSize { 307 logger.Errorf("Requesting blocks batchSize size (%d) greater than configured allowed"+ 308 " (%d) batching for anti-entropy. Ignoring request...", batchSize, defAntiEntropyBatchSize) 309 return 310 } 311 312 if request.StartSeqNum > request.EndSeqNum { 313 logger.Errorf("Invalid sequence interval [%d...%d], ignoring request...", request.StartSeqNum, request.EndSeqNum) 314 return 315 } 316 317 currentHeight, err := s.committer.LedgerHeight() 318 if err != nil { 319 logger.Errorf("Cannot access to current ledger height, due to %s", err) 320 return 321 } 322 if currentHeight < request.EndSeqNum { 323 logger.Warningf("Received state request to transfer blocks with sequence numbers higher [%d...%d] "+ 324 "than available in ledger (%d)", request.StartSeqNum, request.StartSeqNum, currentHeight) 325 } 326 327 endSeqNum := min(currentHeight, request.EndSeqNum) 328 329 response := &proto.RemoteStateResponse{Payloads: make([]*proto.Payload, 0)} 330 for seqNum := request.StartSeqNum; seqNum <= endSeqNum; seqNum++ { 331 logger.Debug("Reading block ", seqNum, " from the committer service") 332 blocks := s.committer.GetBlocks([]uint64{seqNum}) 333 334 if len(blocks) == 0 { 335 logger.Errorf("Wasn't able to read block with sequence number %d from ledger, skipping....", seqNum) 336 continue 337 } 338 339 blockBytes, err := pb.Marshal(blocks[0]) 340 if err != nil { 341 logger.Errorf("Could not marshal block: %s", err) 342 } 343 344 response.Payloads = append(response.Payloads, &proto.Payload{ 345 SeqNum: seqNum, 346 Data: blockBytes, 347 }) 348 } 349 // Sending back response with missing blocks 350 msg.Respond(&proto.GossipMessage{ 351 // Copy nonce field from the request, so it will be possible to match response 352 Nonce: msg.GetGossipMessage().Nonce, 353 Tag: proto.GossipMessage_CHAN_OR_ORG, 354 Channel: []byte(s.chainID), 355 Content: &proto.GossipMessage_StateResponse{response}, 356 }) 357 } 358 359 func (s *GossipStateProviderImpl) handleStateResponse(msg proto.ReceivedMessage) (uint64, error) { 360 max := uint64(0) 361 // Send signal that response for given nonce has been received 362 response := msg.GetGossipMessage().GetStateResponse() 363 // Extract payloads, verify and push into buffer 364 if len(response.GetPayloads()) == 0 { 365 return uint64(0), errors.New("Received state tranfer response without payload") 366 } 367 for _, payload := range response.GetPayloads() { 368 logger.Debugf("Received payload with sequence number %d.", payload.SeqNum) 369 if err := s.mcs.VerifyBlock(common2.ChainID(s.chainID), payload.SeqNum, payload.Data); err != nil { 370 logger.Warningf("Error verifying block with sequence number %d, due to %s", payload.SeqNum, err) 371 return uint64(0), err 372 } 373 if max < payload.SeqNum { 374 max = payload.SeqNum 375 } 376 err := s.payloads.Push(payload) 377 if err != nil { 378 logger.Warningf("Payload with sequence number %d was received earlier", payload.SeqNum) 379 } 380 } 381 return max, nil 382 } 383 384 // Stop function send halting signal to all go routines 385 func (s *GossipStateProviderImpl) Stop() { 386 // Make sure stop won't be executed twice 387 // and stop channel won't be used again 388 s.once.Do(func() { 389 s.stopCh <- struct{}{} 390 // Make sure all go-routines has finished 391 s.done.Wait() 392 // Close all resources 393 s.committer.Close() 394 close(s.stateRequestCh) 395 close(s.stateResponseCh) 396 close(s.stopCh) 397 }) 398 } 399 400 // New message notification/handler 401 func (s *GossipStateProviderImpl) queueNewMessage(msg *proto.GossipMessage) { 402 if !bytes.Equal(msg.Channel, []byte(s.chainID)) { 403 logger.Warning("Received enqueue for channel", 404 string(msg.Channel), "while expecting channel", s.chainID, "ignoring enqueue") 405 return 406 } 407 408 dataMsg := msg.GetDataMsg() 409 if dataMsg != nil { 410 if err := s.AddPayload(dataMsg.GetPayload()); err != nil { 411 logger.Warning("Failed adding payload:", err) 412 return 413 } 414 logger.Debugf("Received new payload with sequence number = [%d]", dataMsg.Payload.SeqNum) 415 } else { 416 logger.Debug("Gossip message received is not of data message type, usually this should not happen.") 417 } 418 } 419 420 func (s *GossipStateProviderImpl) deliverPayloads() { 421 defer s.done.Done() 422 423 for { 424 select { 425 // Wait for notification that next seq has arrived 426 case <-s.payloads.Ready(): 427 logger.Debugf("Ready to transfer payloads to the ledger, next sequence number is = [%d]", s.payloads.Next()) 428 // Collect all subsequent payloads 429 for payload := s.payloads.Pop(); payload != nil; payload = s.payloads.Pop() { 430 rawBlock := &common.Block{} 431 if err := pb.Unmarshal(payload.Data, rawBlock); err != nil { 432 logger.Errorf("Error getting block with seqNum = %d due to (%s)...dropping block", payload.SeqNum, err) 433 continue 434 } 435 if rawBlock.Data == nil || rawBlock.Header == nil { 436 logger.Errorf("Block with claimed sequence %d has no header (%v) or data (%v)", 437 payload.SeqNum, rawBlock.Header, rawBlock.Data) 438 continue 439 } 440 logger.Debug("New block with claimed sequence number ", payload.SeqNum, " transactions num ", len(rawBlock.Data.Data)) 441 if err := s.commitBlock(rawBlock); err != nil { 442 logger.Panicf("Cannot commit block to the ledger due to %s", err) 443 } 444 } 445 case <-s.stopCh: 446 s.stopCh <- struct{}{} 447 logger.Debug("State provider has been stoped, finishing to push new blocks.") 448 return 449 } 450 } 451 } 452 453 func (s *GossipStateProviderImpl) antiEntropy() { 454 defer s.done.Done() 455 defer logger.Debug("State Provider stopped, stopping anti entropy procedure.") 456 457 for { 458 select { 459 case <-s.stopCh: 460 s.stopCh <- struct{}{} 461 return 462 case <-time.After(defAntiEntropyInterval): 463 current, err := s.committer.LedgerHeight() 464 if err != nil { 465 // Unable to read from ledger continue to the next round 466 logger.Error("Cannot obtain ledger height, due to", err) 467 continue 468 } 469 if current == 0 { 470 logger.Error("Ledger reported block height of 0 but this should be impossible") 471 continue 472 } 473 max := s.maxAvailableLedgerHeight() 474 475 if current-1 >= max { 476 continue 477 } 478 479 s.requestBlocksInRange(uint64(current), uint64(max)) 480 } 481 } 482 } 483 484 // Iterate over all available peers and check advertised meta state to 485 // find maximum available ledger height across peers 486 func (s *GossipStateProviderImpl) maxAvailableLedgerHeight() uint64 { 487 max := uint64(0) 488 for _, p := range s.gossip.PeersOfChannel(common2.ChainID(s.chainID)) { 489 if nodeMetastate, err := FromBytes(p.Metadata); err == nil { 490 if max < nodeMetastate.LedgerHeight { 491 max = nodeMetastate.LedgerHeight 492 } 493 } 494 } 495 return max 496 } 497 498 // GetBlocksInRange capable to acquire blocks with sequence 499 // numbers in the range [start...end]. 500 func (s *GossipStateProviderImpl) requestBlocksInRange(start uint64, end uint64) { 501 atomic.StoreInt32(&s.stateTransferActive, 1) 502 defer atomic.StoreInt32(&s.stateTransferActive, 0) 503 504 for prev := start; prev <= end; { 505 next := min(end, prev+defAntiEntropyBatchSize) 506 507 gossipMsg := s.stateRequestMessage(prev, next) 508 509 responseReceived := false 510 tryCounts := 0 511 512 for !responseReceived { 513 if tryCounts > defAntiEntropyMaxRetries { 514 logger.Warningf("Wasn't able to get blocks in range [%d...%d], after %d retries", 515 prev, next, tryCounts) 516 return 517 } 518 // Select peers to ask for blocks 519 peer, err := s.selectPeerToRequestFrom(next) 520 if err != nil { 521 logger.Warningf("Cannot send state request for blocks in range [%d...%d], due to", 522 prev, next, err) 523 return 524 } 525 526 logger.Debugf("State transfer, with peer %s, requesting blocks in range [%d...%d], "+ 527 "for chainID %s", peer.Endpoint, prev, next, s.chainID) 528 529 s.gossip.Send(gossipMsg, peer) 530 tryCounts++ 531 532 // Wait until timeout or response arrival 533 select { 534 case msg := <-s.stateResponseCh: 535 if msg.GetGossipMessage().Nonce != gossipMsg.Nonce { 536 continue 537 } 538 // Got corresponding response for state request, can continue 539 index, err := s.handleStateResponse(msg) 540 if err != nil { 541 logger.Warningf("Wasn't able to process state response for "+ 542 "blocks [%d...%d], due to %s", prev, next, err) 543 continue 544 } 545 prev = index + 1 546 responseReceived = true 547 case <-time.After(defAntiEntropyStateResponseTimeout): 548 case <-s.stopCh: 549 s.stopCh <- struct{}{} 550 return 551 } 552 } 553 } 554 } 555 556 // Generate state request message for given blocks in range [beginSeq...endSeq] 557 func (s *GossipStateProviderImpl) stateRequestMessage(beginSeq uint64, endSeq uint64) *proto.GossipMessage { 558 return &proto.GossipMessage{ 559 Nonce: util.RandomUInt64(), 560 Tag: proto.GossipMessage_CHAN_OR_ORG, 561 Channel: []byte(s.chainID), 562 Content: &proto.GossipMessage_StateRequest{ 563 StateRequest: &proto.RemoteStateRequest{ 564 StartSeqNum: beginSeq, 565 EndSeqNum: endSeq, 566 }, 567 }, 568 } 569 } 570 571 // Select peer which has required blocks to ask missing blocks from 572 func (s *GossipStateProviderImpl) selectPeerToRequestFrom(height uint64) (*comm.RemotePeer, error) { 573 // Filter peers which posses required range of missing blocks 574 peers := s.filterPeers(s.hasRequiredHeight(height)) 575 576 n := len(peers) 577 if n == 0 { 578 return nil, errors.New("there are no peers to ask for missing blocks from") 579 } 580 581 // Select peers to ask for blocks 582 return peers[util.RandomInt(n)], nil 583 } 584 585 // filterPeers return list of peers which aligns the predicate provided 586 func (s *GossipStateProviderImpl) filterPeers(predicate func(peer discovery.NetworkMember) bool) []*comm.RemotePeer { 587 var peers []*comm.RemotePeer 588 589 for _, member := range s.gossip.PeersOfChannel(common2.ChainID(s.chainID)) { 590 if predicate(member) { 591 peers = append(peers, &comm.RemotePeer{Endpoint: member.PreferredEndpoint(), PKIID: member.PKIid}) 592 } 593 } 594 595 return peers 596 } 597 598 // hasRequiredHeight returns predicate which is capable to filter peers with ledger height above than indicated 599 // by provided input parameter 600 func (s *GossipStateProviderImpl) hasRequiredHeight(height uint64) func(peer discovery.NetworkMember) bool { 601 return func(peer discovery.NetworkMember) bool { 602 if nodeMetadata, err := FromBytes(peer.Metadata); err != nil { 603 logger.Errorf("Unable to de-serialize node meta state, error = %s", err) 604 } else if nodeMetadata.LedgerHeight >= height { 605 return true 606 } 607 608 return false 609 } 610 } 611 612 // GetBlock return ledger block given its sequence number as a parameter 613 func (s *GossipStateProviderImpl) GetBlock(index uint64) *common.Block { 614 // Try to read missing block from the ledger, should return no nil with 615 // content including at least one block 616 if blocks := s.committer.GetBlocks([]uint64{index}); blocks != nil && len(blocks) > 0 { 617 return blocks[0] 618 } 619 620 return nil 621 } 622 623 // AddPayload add new payload into state 624 func (s *GossipStateProviderImpl) AddPayload(payload *proto.Payload) error { 625 if payload == nil { 626 return errors.New("Given payload is nil") 627 } 628 logger.Debug("Adding new payload into the buffer, seqNum = ", payload.SeqNum) 629 height, err := s.committer.LedgerHeight() 630 if err != nil { 631 return fmt.Errorf("Failed obtaining ledger height: %v", err) 632 } 633 634 if payload.SeqNum-height >= defMaxBlockDistance { 635 return fmt.Errorf("Ledger height is at %d, cannot enqueue block with sequence of %d", height, payload.SeqNum) 636 } 637 638 return s.payloads.Push(payload) 639 } 640 641 func (s *GossipStateProviderImpl) commitBlock(block *common.Block) error { 642 if err := s.committer.Commit(block); err != nil { 643 logger.Errorf("Got error while committing(%s)", err) 644 return err 645 } 646 647 // Update ledger level within node metadata 648 nodeMetastate := NewNodeMetastate(block.Header.Number) 649 // Decode nodeMetastate to byte array 650 b, err := nodeMetastate.Bytes() 651 if err == nil { 652 s.gossip.UpdateChannelMetadata(b, common2.ChainID(s.chainID)) 653 } else { 654 655 logger.Errorf("Unable to serialize node meta nodeMetastate, error = %s", err) 656 } 657 658 logger.Debugf("Channel [%s]: Created block [%d] with %d transaction(s)", 659 s.chainID, block.Header.Number, len(block.Data.Data)) 660 661 return nil 662 } 663 664 func min(a uint64, b uint64) uint64 { 665 return b ^ ((a ^ b) & (-(uint64(a-b) >> 63))) 666 }