github.com/defanghe/fabric@v2.1.1+incompatible/gossip/comm/comm_impl.go (about) 1 /* 2 Copyright IBM Corp. All Rights Reserved. 3 4 SPDX-License-Identifier: Apache-2.0 5 */ 6 7 package comm 8 9 import ( 10 "bytes" 11 "context" 12 "crypto/tls" 13 "encoding/hex" 14 "fmt" 15 "reflect" 16 "sync" 17 "sync/atomic" 18 "time" 19 20 proto "github.com/hyperledger/fabric-protos-go/gossip" 21 "github.com/hyperledger/fabric/gossip/api" 22 "github.com/hyperledger/fabric/gossip/common" 23 "github.com/hyperledger/fabric/gossip/identity" 24 "github.com/hyperledger/fabric/gossip/metrics" 25 "github.com/hyperledger/fabric/gossip/protoext" 26 "github.com/hyperledger/fabric/gossip/util" 27 "github.com/pkg/errors" 28 "google.golang.org/grpc" 29 "google.golang.org/grpc/peer" 30 ) 31 32 const ( 33 handshakeTimeout = time.Second * 10 34 DefDialTimeout = time.Second * 3 35 DefConnTimeout = time.Second * 2 36 DefRecvBuffSize = 20 37 DefSendBuffSize = 20 38 ) 39 40 var ( 41 errProbe = errors.New("probe") 42 ) 43 44 // SecurityAdvisor defines an external auxiliary object 45 // that provides security and identity related capabilities 46 type SecurityAdvisor interface { 47 // OrgByPeerIdentity returns the organization identity of the given PeerIdentityType 48 OrgByPeerIdentity(api.PeerIdentityType) api.OrgIdentityType 49 } 50 51 func (c *commImpl) SetDialOpts(opts ...grpc.DialOption) { 52 if len(opts) == 0 { 53 c.logger.Warning("Given an empty set of grpc.DialOption, aborting") 54 return 55 } 56 c.opts = opts 57 } 58 59 // NewCommInstance creates a new comm instance that binds itself to the given gRPC server 60 func NewCommInstance(s *grpc.Server, certs *common.TLSCertificates, idStore identity.Mapper, 61 peerIdentity api.PeerIdentityType, secureDialOpts api.PeerSecureDialOpts, sa api.SecurityAdvisor, 62 commMetrics *metrics.CommMetrics, config CommConfig, dialOpts ...grpc.DialOption) (Comm, error) { 63 64 commInst := &commImpl{ 65 sa: sa, 66 pubSub: util.NewPubSub(), 67 PKIID: idStore.GetPKIidOfCert(peerIdentity), 68 idMapper: idStore, 69 logger: util.GetLogger(util.CommLogger, ""), 70 peerIdentity: peerIdentity, 71 opts: dialOpts, 72 secureDialOpts: secureDialOpts, 73 msgPublisher: NewChannelDemultiplexer(), 74 lock: &sync.Mutex{}, 75 deadEndpoints: make(chan common.PKIidType, 100), 76 identityChanges: make(chan common.PKIidType, 1), 77 stopping: int32(0), 78 exitChan: make(chan struct{}), 79 subscriptions: make([]chan protoext.ReceivedMessage, 0), 80 tlsCerts: certs, 81 metrics: commMetrics, 82 dialTimeout: config.DialTimeout, 83 connTimeout: config.ConnTimeout, 84 recvBuffSize: config.RecvBuffSize, 85 sendBuffSize: config.SendBuffSize, 86 } 87 88 connConfig := ConnConfig{ 89 RecvBuffSize: config.RecvBuffSize, 90 SendBuffSize: config.SendBuffSize, 91 } 92 93 commInst.connStore = newConnStore(commInst, commInst.logger, connConfig) 94 95 proto.RegisterGossipServer(s, commInst) 96 97 return commInst, nil 98 } 99 100 // CommConfig is the configuration required to initialize a new comm 101 type CommConfig struct { 102 DialTimeout time.Duration // Dial timeout 103 ConnTimeout time.Duration // Connection timeout 104 RecvBuffSize int // Buffer size of received messages 105 SendBuffSize int // Buffer size of sending messages 106 } 107 108 type commImpl struct { 109 sa api.SecurityAdvisor 110 tlsCerts *common.TLSCertificates 111 pubSub *util.PubSub 112 peerIdentity api.PeerIdentityType 113 idMapper identity.Mapper 114 logger util.Logger 115 opts []grpc.DialOption 116 secureDialOpts func() []grpc.DialOption 117 connStore *connectionStore 118 PKIID []byte 119 deadEndpoints chan common.PKIidType 120 identityChanges chan common.PKIidType 121 msgPublisher *ChannelDeMultiplexer 122 lock *sync.Mutex 123 exitChan chan struct{} 124 stopWG sync.WaitGroup 125 subscriptions []chan protoext.ReceivedMessage 126 stopping int32 127 metrics *metrics.CommMetrics 128 dialTimeout time.Duration 129 connTimeout time.Duration 130 recvBuffSize int 131 sendBuffSize int 132 } 133 134 func (c *commImpl) createConnection(endpoint string, expectedPKIID common.PKIidType) (*connection, error) { 135 var err error 136 var cc *grpc.ClientConn 137 var stream proto.Gossip_GossipStreamClient 138 var pkiID common.PKIidType 139 var connInfo *protoext.ConnectionInfo 140 var dialOpts []grpc.DialOption 141 142 c.logger.Debug("Entering", endpoint, expectedPKIID) 143 defer c.logger.Debug("Exiting") 144 145 if c.isStopping() { 146 return nil, errors.New("Stopping") 147 } 148 dialOpts = append(dialOpts, c.secureDialOpts()...) 149 dialOpts = append(dialOpts, grpc.WithBlock()) 150 dialOpts = append(dialOpts, c.opts...) 151 ctx := context.Background() 152 ctx, cancel := context.WithTimeout(ctx, c.dialTimeout) 153 defer cancel() 154 cc, err = grpc.DialContext(ctx, endpoint, dialOpts...) 155 if err != nil { 156 return nil, errors.WithStack(err) 157 } 158 159 cl := proto.NewGossipClient(cc) 160 161 ctx, cancel = context.WithTimeout(context.Background(), c.connTimeout) 162 defer cancel() 163 if _, err = cl.Ping(ctx, &proto.Empty{}); err != nil { 164 cc.Close() 165 return nil, errors.WithStack(err) 166 } 167 168 ctx, cancel = context.WithCancel(context.Background()) 169 if stream, err = cl.GossipStream(ctx); err == nil { 170 connInfo, err = c.authenticateRemotePeer(stream, true, false) 171 if err == nil { 172 pkiID = connInfo.ID 173 // PKIID is nil when we don't know the remote PKI id's 174 if expectedPKIID != nil && !bytes.Equal(pkiID, expectedPKIID) { 175 actualOrg := c.sa.OrgByPeerIdentity(connInfo.Identity) 176 // If the identity isn't present, it's nil - therefore OrgByPeerIdentity would 177 // return nil too and thus would be different than the actual organization 178 identity, _ := c.idMapper.Get(expectedPKIID) 179 oldOrg := c.sa.OrgByPeerIdentity(identity) 180 if !bytes.Equal(actualOrg, oldOrg) { 181 c.logger.Warning("Remote endpoint claims to be a different peer, expected", expectedPKIID, "but got", pkiID) 182 cc.Close() 183 cancel() 184 return nil, errors.New("authentication failure") 185 } else { 186 c.logger.Infof("Peer %s changed its PKI-ID from %s to %s", endpoint, expectedPKIID, pkiID) 187 c.identityChanges <- expectedPKIID 188 } 189 } 190 connConfig := ConnConfig{ 191 RecvBuffSize: c.recvBuffSize, 192 SendBuffSize: c.sendBuffSize, 193 } 194 conn := newConnection(cl, cc, stream, c.metrics, connConfig) 195 conn.pkiID = pkiID 196 conn.info = connInfo 197 conn.logger = c.logger 198 conn.cancel = cancel 199 200 h := func(m *protoext.SignedGossipMessage) { 201 c.logger.Debug("Got message:", m) 202 c.msgPublisher.DeMultiplex(&ReceivedMessageImpl{ 203 conn: conn, 204 SignedGossipMessage: m, 205 connInfo: connInfo, 206 }) 207 } 208 conn.handler = interceptAcks(h, connInfo.ID, c.pubSub) 209 return conn, nil 210 } 211 c.logger.Warningf("Authentication failed: %+v", err) 212 } 213 cc.Close() 214 cancel() 215 return nil, errors.WithStack(err) 216 } 217 218 func (c *commImpl) Send(msg *protoext.SignedGossipMessage, peers ...*RemotePeer) { 219 if c.isStopping() || len(peers) == 0 { 220 return 221 } 222 c.logger.Debug("Entering, sending", msg, "to ", len(peers), "peers") 223 224 for _, peer := range peers { 225 go func(peer *RemotePeer, msg *protoext.SignedGossipMessage) { 226 c.sendToEndpoint(peer, msg, nonBlockingSend) 227 }(peer, msg) 228 } 229 } 230 231 func (c *commImpl) sendToEndpoint(peer *RemotePeer, msg *protoext.SignedGossipMessage, shouldBlock blockingBehavior) { 232 if c.isStopping() { 233 return 234 } 235 c.logger.Debug("Entering, Sending to", peer.Endpoint, ", msg:", msg) 236 defer c.logger.Debug("Exiting") 237 var err error 238 239 conn, err := c.connStore.getConnection(peer) 240 if err == nil { 241 disConnectOnErr := func(err error) { 242 c.logger.Warningf("%v isn't responsive: %v", peer, err) 243 c.disconnect(peer.PKIID) 244 conn.close() 245 } 246 conn.send(msg, disConnectOnErr, shouldBlock) 247 return 248 } 249 c.logger.Warningf("Failed obtaining connection for %v reason: %v", peer, err) 250 c.disconnect(peer.PKIID) 251 } 252 253 func (c *commImpl) isStopping() bool { 254 return atomic.LoadInt32(&c.stopping) == int32(1) 255 } 256 257 func (c *commImpl) Probe(remotePeer *RemotePeer) error { 258 var dialOpts []grpc.DialOption 259 endpoint := remotePeer.Endpoint 260 pkiID := remotePeer.PKIID 261 if c.isStopping() { 262 return fmt.Errorf("Stopping") 263 } 264 c.logger.Debug("Entering, endpoint:", endpoint, "PKIID:", pkiID) 265 dialOpts = append(dialOpts, c.secureDialOpts()...) 266 dialOpts = append(dialOpts, grpc.WithBlock()) 267 dialOpts = append(dialOpts, c.opts...) 268 ctx := context.Background() 269 ctx, cancel := context.WithTimeout(ctx, c.dialTimeout) 270 defer cancel() 271 cc, err := grpc.DialContext(ctx, remotePeer.Endpoint, dialOpts...) 272 if err != nil { 273 c.logger.Debugf("Returning %v", err) 274 return err 275 } 276 defer cc.Close() 277 cl := proto.NewGossipClient(cc) 278 ctx, cancel = context.WithTimeout(context.Background(), c.connTimeout) 279 defer cancel() 280 _, err = cl.Ping(ctx, &proto.Empty{}) 281 c.logger.Debugf("Returning %v", err) 282 return err 283 } 284 285 func (c *commImpl) Handshake(remotePeer *RemotePeer) (api.PeerIdentityType, error) { 286 var dialOpts []grpc.DialOption 287 dialOpts = append(dialOpts, c.secureDialOpts()...) 288 dialOpts = append(dialOpts, grpc.WithBlock()) 289 dialOpts = append(dialOpts, c.opts...) 290 ctx := context.Background() 291 ctx, cancel := context.WithTimeout(ctx, c.dialTimeout) 292 defer cancel() 293 cc, err := grpc.DialContext(ctx, remotePeer.Endpoint, dialOpts...) 294 if err != nil { 295 return nil, err 296 } 297 defer cc.Close() 298 299 cl := proto.NewGossipClient(cc) 300 ctx, cancel = context.WithTimeout(context.Background(), c.connTimeout) 301 defer cancel() 302 if _, err = cl.Ping(ctx, &proto.Empty{}); err != nil { 303 return nil, err 304 } 305 306 ctx, cancel = context.WithTimeout(context.Background(), handshakeTimeout) 307 defer cancel() 308 stream, err := cl.GossipStream(ctx) 309 if err != nil { 310 return nil, err 311 } 312 connInfo, err := c.authenticateRemotePeer(stream, true, true) 313 if err != nil { 314 c.logger.Warningf("Authentication failed: %v", err) 315 return nil, err 316 } 317 if len(remotePeer.PKIID) > 0 && !bytes.Equal(connInfo.ID, remotePeer.PKIID) { 318 return nil, fmt.Errorf("PKI-ID of remote peer doesn't match expected PKI-ID") 319 } 320 return connInfo.Identity, nil 321 } 322 323 func (c *commImpl) Accept(acceptor common.MessageAcceptor) <-chan protoext.ReceivedMessage { 324 genericChan := c.msgPublisher.AddChannel(acceptor) 325 specificChan := make(chan protoext.ReceivedMessage, 10) 326 327 if c.isStopping() { 328 c.logger.Warning("Accept() called but comm module is stopping, returning empty channel") 329 return specificChan 330 } 331 332 c.lock.Lock() 333 c.subscriptions = append(c.subscriptions, specificChan) 334 c.lock.Unlock() 335 336 c.stopWG.Add(1) 337 go func() { 338 defer c.logger.Debug("Exiting Accept() loop") 339 340 defer c.stopWG.Done() 341 342 for { 343 select { 344 case msg, channelOpen := <-genericChan: 345 if !channelOpen { 346 return 347 } 348 select { 349 case specificChan <- msg.(*ReceivedMessageImpl): 350 case <-c.exitChan: 351 return 352 } 353 case <-c.exitChan: 354 return 355 } 356 } 357 }() 358 return specificChan 359 } 360 361 func (c *commImpl) PresumedDead() <-chan common.PKIidType { 362 return c.deadEndpoints 363 } 364 365 func (c *commImpl) IdentitySwitch() <-chan common.PKIidType { 366 return c.identityChanges 367 } 368 369 func (c *commImpl) CloseConn(peer *RemotePeer) { 370 c.logger.Debug("Closing connection for", peer) 371 c.connStore.closeConnByPKIid(peer.PKIID) 372 } 373 374 func (c *commImpl) closeSubscriptions() { 375 c.lock.Lock() 376 defer c.lock.Unlock() 377 for _, ch := range c.subscriptions { 378 close(ch) 379 } 380 } 381 382 func (c *commImpl) Stop() { 383 if !atomic.CompareAndSwapInt32(&c.stopping, 0, int32(1)) { 384 return 385 } 386 c.logger.Info("Stopping") 387 defer c.logger.Info("Stopped") 388 c.connStore.shutdown() 389 c.logger.Debug("Shut down connection store, connection count:", c.connStore.connNum()) 390 c.msgPublisher.Close() 391 close(c.exitChan) 392 c.stopWG.Wait() 393 c.closeSubscriptions() 394 } 395 396 func (c *commImpl) GetPKIid() common.PKIidType { 397 return c.PKIID 398 } 399 400 func extractRemoteAddress(stream stream) string { 401 var remoteAddress string 402 p, ok := peer.FromContext(stream.Context()) 403 if ok { 404 if address := p.Addr; address != nil { 405 remoteAddress = address.String() 406 } 407 } 408 return remoteAddress 409 } 410 411 func (c *commImpl) authenticateRemotePeer(stream stream, initiator, isProbe bool) (*protoext.ConnectionInfo, error) { 412 ctx := stream.Context() 413 remoteAddress := extractRemoteAddress(stream) 414 remoteCertHash := extractCertificateHashFromContext(ctx) 415 var err error 416 var cMsg *protoext.SignedGossipMessage 417 useTLS := c.tlsCerts != nil 418 var selfCertHash []byte 419 420 if useTLS { 421 certReference := c.tlsCerts.TLSServerCert 422 if initiator { 423 certReference = c.tlsCerts.TLSClientCert 424 } 425 selfCertHash = certHashFromRawCert(certReference.Load().(*tls.Certificate).Certificate[0]) 426 } 427 428 signer := func(msg []byte) ([]byte, error) { 429 return c.idMapper.Sign(msg) 430 } 431 432 // TLS enabled but not detected on other side 433 if useTLS && len(remoteCertHash) == 0 { 434 c.logger.Warningf("%s didn't send TLS certificate", remoteAddress) 435 return nil, fmt.Errorf("No TLS certificate") 436 } 437 438 cMsg, err = c.createConnectionMsg(c.PKIID, selfCertHash, c.peerIdentity, signer, isProbe) 439 if err != nil { 440 return nil, err 441 } 442 443 c.logger.Debug("Sending", cMsg, "to", remoteAddress) 444 stream.Send(cMsg.Envelope) 445 m, err := readWithTimeout(stream, c.connTimeout, remoteAddress) 446 if err != nil { 447 c.logger.Warningf("Failed reading messge from %s, reason: %v", remoteAddress, err) 448 return nil, err 449 } 450 receivedMsg := m.GetConn() 451 if receivedMsg == nil { 452 c.logger.Warning("Expected connection message from", remoteAddress, "but got", receivedMsg) 453 return nil, fmt.Errorf("Wrong type") 454 } 455 456 if receivedMsg.PkiId == nil { 457 c.logger.Warningf("%s didn't send a pkiID", remoteAddress) 458 return nil, fmt.Errorf("No PKI-ID") 459 } 460 461 c.logger.Debug("Received", receivedMsg, "from", remoteAddress) 462 err = c.idMapper.Put(receivedMsg.PkiId, receivedMsg.Identity) 463 if err != nil { 464 c.logger.Warningf("Identity store rejected %s : %v", remoteAddress, err) 465 return nil, err 466 } 467 468 connInfo := &protoext.ConnectionInfo{ 469 ID: receivedMsg.PkiId, 470 Identity: receivedMsg.Identity, 471 Endpoint: remoteAddress, 472 Auth: &protoext.AuthInfo{ 473 Signature: m.Signature, 474 SignedData: m.Payload, 475 }, 476 } 477 478 // if TLS is enabled and detected, verify remote peer 479 if useTLS { 480 // If the remote peer sent its TLS certificate, make sure it actually matches the TLS cert 481 // that the peer used. 482 if !bytes.Equal(remoteCertHash, receivedMsg.TlsCertHash) { 483 return nil, errors.Errorf("Expected %v in remote hash of TLS cert, but got %v", remoteCertHash, receivedMsg.TlsCertHash) 484 } 485 } 486 // Final step - verify the signature on the connection message itself 487 verifier := func(peerIdentity []byte, signature, message []byte) error { 488 pkiID := c.idMapper.GetPKIidOfCert(api.PeerIdentityType(peerIdentity)) 489 return c.idMapper.Verify(pkiID, signature, message) 490 } 491 err = m.Verify(receivedMsg.Identity, verifier) 492 if err != nil { 493 c.logger.Errorf("Failed verifying signature from %s : %v", remoteAddress, err) 494 return nil, err 495 } 496 497 c.logger.Debug("Authenticated", remoteAddress) 498 499 if receivedMsg.Probe { 500 return connInfo, errProbe 501 } 502 503 return connInfo, nil 504 } 505 506 // SendWithAck sends a message to remote peers, waiting for acknowledgement from minAck of them, or until a certain timeout expires 507 func (c *commImpl) SendWithAck(msg *protoext.SignedGossipMessage, timeout time.Duration, minAck int, peers ...*RemotePeer) AggregatedSendResult { 508 if len(peers) == 0 { 509 return nil 510 } 511 var err error 512 513 // Roll a random NONCE to be used as a send ID to differentiate 514 // between different invocations 515 msg.Nonce = util.RandomUInt64() 516 // Replace the envelope in the message to update the NONCE 517 msg, err = protoext.NoopSign(msg.GossipMessage) 518 519 if c.isStopping() || err != nil { 520 if err == nil { 521 err = errors.New("comm is stopping") 522 } 523 results := []SendResult{} 524 for _, p := range peers { 525 results = append(results, SendResult{ 526 error: err, 527 RemotePeer: *p, 528 }) 529 } 530 return results 531 } 532 c.logger.Debug("Entering, sending", msg, "to ", len(peers), "peers") 533 sndFunc := func(peer *RemotePeer, msg *protoext.SignedGossipMessage) { 534 c.sendToEndpoint(peer, msg, blockingSend) 535 } 536 // Subscribe to acks 537 subscriptions := make(map[string]func() error) 538 for _, p := range peers { 539 topic := topicForAck(msg.Nonce, p.PKIID) 540 sub := c.pubSub.Subscribe(topic, timeout) 541 subscriptions[string(p.PKIID)] = func() error { 542 msg, err := sub.Listen() 543 if err != nil { 544 return err 545 } 546 if msg, isAck := msg.(*proto.Acknowledgement); !isAck { 547 return fmt.Errorf("Received a message of type %s, expected *proto.Acknowledgement", reflect.TypeOf(msg)) 548 } else { 549 if msg.Error != "" { 550 return errors.New(msg.Error) 551 } 552 } 553 return nil 554 } 555 } 556 waitForAck := func(p *RemotePeer) error { 557 return subscriptions[string(p.PKIID)]() 558 } 559 ackOperation := newAckSendOperation(sndFunc, waitForAck) 560 return ackOperation.send(msg, minAck, peers...) 561 } 562 563 func (c *commImpl) GossipStream(stream proto.Gossip_GossipStreamServer) error { 564 if c.isStopping() { 565 return fmt.Errorf("Shutting down") 566 } 567 connInfo, err := c.authenticateRemotePeer(stream, false, false) 568 569 if err == errProbe { 570 c.logger.Infof("Peer %s (%s) probed us", connInfo.ID, connInfo.Endpoint) 571 return nil 572 } 573 574 if err != nil { 575 c.logger.Errorf("Authentication failed: %v", err) 576 return err 577 } 578 c.logger.Debug("Servicing", extractRemoteAddress(stream)) 579 580 conn := c.connStore.onConnected(stream, connInfo, c.metrics) 581 582 h := func(m *protoext.SignedGossipMessage) { 583 c.msgPublisher.DeMultiplex(&ReceivedMessageImpl{ 584 conn: conn, 585 SignedGossipMessage: m, 586 connInfo: connInfo, 587 }) 588 } 589 590 conn.handler = interceptAcks(h, connInfo.ID, c.pubSub) 591 592 defer func() { 593 c.logger.Debug("Client", extractRemoteAddress(stream), " disconnected") 594 c.connStore.closeConnByPKIid(connInfo.ID) 595 }() 596 597 return conn.serviceConnection() 598 } 599 600 func (c *commImpl) Ping(context.Context, *proto.Empty) (*proto.Empty, error) { 601 return &proto.Empty{}, nil 602 } 603 604 func (c *commImpl) disconnect(pkiID common.PKIidType) { 605 if c.isStopping() { 606 return 607 } 608 c.deadEndpoints <- pkiID 609 c.connStore.closeConnByPKIid(pkiID) 610 } 611 612 func readWithTimeout(stream stream, timeout time.Duration, address string) (*protoext.SignedGossipMessage, error) { 613 incChan := make(chan *protoext.SignedGossipMessage, 1) 614 errChan := make(chan error, 1) 615 go func() { 616 if m, err := stream.Recv(); err == nil { 617 msg, err := protoext.EnvelopeToGossipMessage(m) 618 if err != nil { 619 errChan <- err 620 return 621 } 622 incChan <- msg 623 } 624 }() 625 select { 626 case <-time.After(timeout): 627 return nil, errors.Errorf("timed out waiting for connection message from %s", address) 628 case m := <-incChan: 629 return m, nil 630 case err := <-errChan: 631 return nil, errors.WithStack(err) 632 } 633 } 634 635 func (c *commImpl) createConnectionMsg(pkiID common.PKIidType, certHash []byte, cert api.PeerIdentityType, signer protoext.Signer, isProbe bool) (*protoext.SignedGossipMessage, error) { 636 m := &proto.GossipMessage{ 637 Tag: proto.GossipMessage_EMPTY, 638 Nonce: 0, 639 Content: &proto.GossipMessage_Conn{ 640 Conn: &proto.ConnEstablish{ 641 TlsCertHash: certHash, 642 Identity: cert, 643 PkiId: pkiID, 644 Probe: isProbe, 645 }, 646 }, 647 } 648 sMsg := &protoext.SignedGossipMessage{ 649 GossipMessage: m, 650 } 651 _, err := sMsg.Sign(signer) 652 return sMsg, errors.WithStack(err) 653 } 654 655 type stream interface { 656 Send(envelope *proto.Envelope) error 657 Recv() (*proto.Envelope, error) 658 Context() context.Context 659 } 660 661 func topicForAck(nonce uint64, pkiID common.PKIidType) string { 662 return fmt.Sprintf("%d %s", nonce, hex.EncodeToString(pkiID)) 663 }