github.com/myafeier/fabric@v1.0.1-0.20170722181825-3a4b1f2bce86/gossip/discovery/discovery_impl.go (about) 1 /* 2 Copyright IBM Corp. All Rights Reserved. 3 4 SPDX-License-Identifier: Apache-2.0 5 */ 6 7 package discovery 8 9 import ( 10 "bytes" 11 "errors" 12 "fmt" 13 "math" 14 "strconv" 15 "strings" 16 "sync" 17 "sync/atomic" 18 "time" 19 20 "github.com/hyperledger/fabric/gossip/common" 21 "github.com/hyperledger/fabric/gossip/gossip/msgstore" 22 "github.com/hyperledger/fabric/gossip/util" 23 proto "github.com/hyperledger/fabric/protos/gossip" 24 "github.com/op/go-logging" 25 ) 26 27 const defaultHelloInterval = time.Duration(5) * time.Second 28 const msgExpirationFactor = 20 29 30 var aliveExpirationCheckInterval time.Duration 31 var maxConnectionAttempts = 120 32 33 // SetAliveTimeInterval sets the alive time interval 34 func SetAliveTimeInterval(interval time.Duration) { 35 util.SetDuration("peer.gossip.aliveTimeInterval", interval) 36 } 37 38 // SetAliveExpirationTimeout sets the expiration timeout 39 func SetAliveExpirationTimeout(timeout time.Duration) { 40 util.SetDuration("peer.gossip.aliveExpirationTimeout", timeout) 41 aliveExpirationCheckInterval = time.Duration(timeout / 10) 42 } 43 44 // SetAliveExpirationCheckInterval sets the expiration check interval 45 func SetAliveExpirationCheckInterval(interval time.Duration) { 46 aliveExpirationCheckInterval = interval 47 } 48 49 // SetReconnectInterval sets the reconnect interval 50 func SetReconnectInterval(interval time.Duration) { 51 util.SetDuration("peer.gossip.reconnectInterval", interval) 52 } 53 54 // SetMaxConnAttempts sets the maximum number of connection 55 // attempts the peer would perform when invoking Connect() 56 func SetMaxConnAttempts(attempts int) { 57 maxConnectionAttempts = attempts 58 } 59 60 type timestamp struct { 61 incTime time.Time 62 seqNum uint64 63 lastSeen time.Time 64 } 65 66 func (ts *timestamp) String() string { 67 return fmt.Sprintf("%v, %v", ts.incTime.UnixNano(), ts.seqNum) 68 } 69 70 type gossipDiscoveryImpl struct { 71 incTime uint64 72 seqNum uint64 73 self NetworkMember 74 deadLastTS map[string]*timestamp // H 75 aliveLastTS map[string]*timestamp // V 76 id2Member map[string]*NetworkMember // all known members 77 aliveMembership *util.MembershipStore 78 deadMembership *util.MembershipStore 79 80 msgStore *aliveMsgStore 81 82 comm CommService 83 crypt CryptoService 84 lock *sync.RWMutex 85 86 toDieChan chan struct{} 87 toDieFlag int32 88 port int 89 logger *logging.Logger 90 disclosurePolicy DisclosurePolicy 91 pubsub *util.PubSub 92 } 93 94 // NewDiscoveryService returns a new discovery service with the comm module passed and the crypto service passed 95 func NewDiscoveryService(self NetworkMember, comm CommService, crypt CryptoService, disPol DisclosurePolicy) Discovery { 96 d := &gossipDiscoveryImpl{ 97 self: self, 98 incTime: uint64(time.Now().UnixNano()), 99 seqNum: uint64(0), 100 deadLastTS: make(map[string]*timestamp), 101 aliveLastTS: make(map[string]*timestamp), 102 id2Member: make(map[string]*NetworkMember), 103 aliveMembership: util.NewMembershipStore(), 104 deadMembership: util.NewMembershipStore(), 105 crypt: crypt, 106 comm: comm, 107 lock: &sync.RWMutex{}, 108 toDieChan: make(chan struct{}, 1), 109 toDieFlag: int32(0), 110 logger: util.GetLogger(util.LoggingDiscoveryModule, self.InternalEndpoint), 111 disclosurePolicy: disPol, 112 pubsub: util.NewPubSub(), 113 } 114 115 d.validateSelfConfig() 116 d.msgStore = newAliveMsgStore(d) 117 118 go d.periodicalSendAlive() 119 go d.periodicalCheckAlive() 120 go d.handleMessages() 121 go d.periodicalReconnectToDead() 122 go d.handlePresumedDeadPeers() 123 124 d.logger.Info("Started", self, "incTime is", d.incTime) 125 126 return d 127 } 128 129 // Lookup returns a network member, or nil if not found 130 func (d *gossipDiscoveryImpl) Lookup(PKIID common.PKIidType) *NetworkMember { 131 if bytes.Equal(PKIID, d.self.PKIid) { 132 return &d.self 133 } 134 d.lock.RLock() 135 defer d.lock.RUnlock() 136 nm := d.id2Member[string(PKIID)] 137 return nm 138 } 139 140 func (d *gossipDiscoveryImpl) Connect(member NetworkMember, id identifier) { 141 for _, endpoint := range []string{member.InternalEndpoint, member.Endpoint} { 142 if d.isMyOwnEndpoint(endpoint) { 143 d.logger.Debug("Skipping connecting to myself") 144 return 145 } 146 } 147 148 d.logger.Debug("Entering", member) 149 defer d.logger.Debug("Exiting") 150 go func() { 151 for i := 0; i < maxConnectionAttempts && !d.toDie(); i++ { 152 id, err := id() 153 if err != nil { 154 if d.toDie() { 155 return 156 } 157 d.logger.Warning("Could not connect to", member, ":", err) 158 time.Sleep(getReconnectInterval()) 159 continue 160 } 161 peer := &NetworkMember{ 162 InternalEndpoint: member.InternalEndpoint, 163 Endpoint: member.Endpoint, 164 PKIid: id.ID, 165 } 166 m, err := d.createMembershipRequest(id.SelfOrg) 167 if err != nil { 168 d.logger.Warning("Failed creating membership request:", err) 169 continue 170 } 171 req, err := m.NoopSign() 172 if err != nil { 173 d.logger.Warning("Failed creating SignedGossipMessage:", err) 174 continue 175 } 176 req.Nonce = util.RandomUInt64() 177 req, err = req.NoopSign() 178 if err != nil { 179 d.logger.Warning("Failed adding NONCE to SignedGossipMessage", err) 180 continue 181 } 182 go d.sendUntilAcked(peer, req) 183 return 184 } 185 186 }() 187 } 188 189 func (d *gossipDiscoveryImpl) isMyOwnEndpoint(endpoint string) bool { 190 return endpoint == fmt.Sprintf("127.0.0.1:%d", d.port) || endpoint == fmt.Sprintf("localhost:%d", d.port) || 191 endpoint == d.self.InternalEndpoint || endpoint == d.self.Endpoint 192 } 193 194 func (d *gossipDiscoveryImpl) validateSelfConfig() { 195 endpoint := d.self.InternalEndpoint 196 if len(endpoint) == 0 { 197 d.logger.Panic("Internal endpoint is empty:", endpoint) 198 } 199 200 internalEndpointSplit := strings.Split(endpoint, ":") 201 if len(internalEndpointSplit) != 2 { 202 d.logger.Panicf("Self endpoint %s isn't formatted as 'host:port'", endpoint) 203 } 204 myPort, err := strconv.ParseInt(internalEndpointSplit[1], 10, 64) 205 if err != nil { 206 d.logger.Panicf("Self endpoint %s has not valid port'", endpoint) 207 } 208 209 if myPort > int64(math.MaxUint16) { 210 d.logger.Panicf("Self endpoint %s's port takes more than 16 bits", endpoint) 211 } 212 213 d.port = int(myPort) 214 } 215 216 func (d *gossipDiscoveryImpl) sendUntilAcked(peer *NetworkMember, message *proto.SignedGossipMessage) { 217 nonce := message.Nonce 218 for i := 0; i < maxConnectionAttempts && !d.toDie(); i++ { 219 sub := d.pubsub.Subscribe(fmt.Sprintf("%d", nonce), time.Second*5) 220 d.comm.SendToPeer(peer, message) 221 if _, timeoutErr := sub.Listen(); timeoutErr == nil { 222 return 223 } 224 time.Sleep(getReconnectInterval()) 225 } 226 } 227 228 func (d *gossipDiscoveryImpl) InitiateSync(peerNum int) { 229 if d.toDie() { 230 return 231 } 232 var peers2SendTo []*NetworkMember 233 m, err := d.createMembershipRequest(true) 234 if err != nil { 235 d.logger.Warning("Failed creating membership request:", err) 236 return 237 } 238 memReq, err := m.NoopSign() 239 if err != nil { 240 d.logger.Warning("Failed creating SignedGossipMessage:", err) 241 return 242 } 243 d.lock.RLock() 244 245 n := d.aliveMembership.Size() 246 k := peerNum 247 if k > n { 248 k = n 249 } 250 251 aliveMembersAsSlice := d.aliveMembership.ToSlice() 252 for _, i := range util.GetRandomIndices(k, n-1) { 253 pulledPeer := aliveMembersAsSlice[i].GetAliveMsg().Membership 254 var internalEndpoint string 255 if aliveMembersAsSlice[i].Envelope.SecretEnvelope != nil { 256 internalEndpoint = aliveMembersAsSlice[i].Envelope.SecretEnvelope.InternalEndpoint() 257 } 258 netMember := &NetworkMember{ 259 Endpoint: pulledPeer.Endpoint, 260 Metadata: pulledPeer.Metadata, 261 PKIid: pulledPeer.PkiId, 262 InternalEndpoint: internalEndpoint, 263 } 264 peers2SendTo = append(peers2SendTo, netMember) 265 } 266 267 d.lock.RUnlock() 268 269 for _, netMember := range peers2SendTo { 270 d.comm.SendToPeer(netMember, memReq) 271 } 272 } 273 274 func (d *gossipDiscoveryImpl) handlePresumedDeadPeers() { 275 defer d.logger.Debug("Stopped") 276 277 for !d.toDie() { 278 select { 279 case deadPeer := <-d.comm.PresumedDead(): 280 if d.isAlive(deadPeer) { 281 d.expireDeadMembers([]common.PKIidType{deadPeer}) 282 } 283 case s := <-d.toDieChan: 284 d.toDieChan <- s 285 return 286 } 287 } 288 } 289 290 func (d *gossipDiscoveryImpl) isAlive(pkiID common.PKIidType) bool { 291 d.lock.RLock() 292 defer d.lock.RUnlock() 293 _, alive := d.aliveLastTS[string(pkiID)] 294 return alive 295 } 296 297 func (d *gossipDiscoveryImpl) handleMessages() { 298 defer d.logger.Debug("Stopped") 299 300 in := d.comm.Accept() 301 for !d.toDie() { 302 select { 303 case s := <-d.toDieChan: 304 d.toDieChan <- s 305 return 306 case m := <-in: 307 d.handleMsgFromComm(m) 308 } 309 } 310 } 311 312 func (d *gossipDiscoveryImpl) handleMsgFromComm(m *proto.SignedGossipMessage) { 313 if m == nil { 314 return 315 } 316 if m.GetAliveMsg() == nil && m.GetMemRes() == nil && m.GetMemReq() == nil { 317 d.logger.Warning("Got message with wrong type (expected Alive or MembershipResponse or MembershipRequest message):", m.GossipMessage) 318 return 319 } 320 321 d.logger.Debug("Got message:", m) 322 defer d.logger.Debug("Exiting") 323 324 if memReq := m.GetMemReq(); memReq != nil { 325 selfInfoGossipMsg, err := memReq.SelfInformation.ToGossipMessage() 326 if err != nil { 327 d.logger.Warning("Failed deserializing GossipMessage from envelope:", err) 328 return 329 } 330 331 if d.msgStore.CheckValid(selfInfoGossipMsg) { 332 d.handleAliveMessage(selfInfoGossipMsg) 333 } 334 335 var internalEndpoint string 336 if m.Envelope.SecretEnvelope != nil { 337 internalEndpoint = m.Envelope.SecretEnvelope.InternalEndpoint() 338 } 339 340 // Sending a membership response to a peer may block this routine 341 // in case the sending is deliberately slow (i.e attack). 342 // will keep this async until I'll write a timeout detector in the comm layer 343 go d.sendMemResponse(selfInfoGossipMsg.GetAliveMsg().Membership, internalEndpoint, m.Nonce) 344 return 345 } 346 347 if m.IsAliveMsg() { 348 349 if !d.msgStore.Add(m) { 350 return 351 } 352 d.handleAliveMessage(m) 353 354 d.comm.Gossip(m) 355 return 356 } 357 358 if memResp := m.GetMemRes(); memResp != nil { 359 d.pubsub.Publish(fmt.Sprintf("%d", m.Nonce), m.Nonce) 360 for _, env := range memResp.Alive { 361 am, err := env.ToGossipMessage() 362 if err != nil { 363 d.logger.Warning("Membership response contains an invalid message from an online peer:", err) 364 return 365 } 366 if !am.IsAliveMsg() { 367 d.logger.Warning("Expected alive message, got", am, "instead") 368 return 369 } 370 371 if d.msgStore.CheckValid(am) { 372 d.handleAliveMessage(am) 373 } 374 } 375 376 for _, env := range memResp.Dead { 377 dm, err := env.ToGossipMessage() 378 if err != nil { 379 d.logger.Warning("Membership response contains an invalid message from an offline peer", err) 380 return 381 } 382 if !d.crypt.ValidateAliveMsg(dm) { 383 d.logger.Debugf("Alive message isn't authentic, someone spoofed %s's identity", dm.GetAliveMsg().Membership) 384 continue 385 } 386 387 if !d.msgStore.CheckValid(dm) { 388 //Newer alive message exist 389 return 390 } 391 392 newDeadMembers := []*proto.SignedGossipMessage{} 393 d.lock.RLock() 394 if _, known := d.id2Member[string(dm.GetAliveMsg().Membership.PkiId)]; !known { 395 newDeadMembers = append(newDeadMembers, dm) 396 } 397 d.lock.RUnlock() 398 d.learnNewMembers([]*proto.SignedGossipMessage{}, newDeadMembers) 399 } 400 } 401 } 402 403 func (d *gossipDiscoveryImpl) sendMemResponse(targetMember *proto.Member, internalEndpoint string, nonce uint64) { 404 d.logger.Debug("Entering", targetMember) 405 406 targetPeer := &NetworkMember{ 407 Endpoint: targetMember.Endpoint, 408 Metadata: targetMember.Metadata, 409 PKIid: targetMember.PkiId, 410 InternalEndpoint: internalEndpoint, 411 } 412 413 aliveMsg, err := d.createAliveMessage(true) 414 if err != nil { 415 d.logger.Warning("Failed creating alive message:", err) 416 return 417 } 418 memResp := d.createMembershipResponse(aliveMsg, targetPeer) 419 if memResp == nil { 420 errMsg := `Got a membership request from a peer that shouldn't have sent one: %v, closing connection to the peer as a result.` 421 d.logger.Warningf(errMsg, targetMember) 422 d.comm.CloseConn(targetPeer) 423 return 424 } 425 426 defer d.logger.Debug("Exiting, replying with", memResp) 427 428 msg, err := (&proto.GossipMessage{ 429 Tag: proto.GossipMessage_EMPTY, 430 Nonce: nonce, 431 Content: &proto.GossipMessage_MemRes{ 432 MemRes: memResp, 433 }, 434 }).NoopSign() 435 if err != nil { 436 d.logger.Warning("Failed creating SignedGossipMessage:", err) 437 return 438 } 439 d.comm.SendToPeer(targetPeer, msg) 440 } 441 442 func (d *gossipDiscoveryImpl) createMembershipResponse(aliveMsg *proto.SignedGossipMessage, targetMember *NetworkMember) *proto.MembershipResponse { 443 shouldBeDisclosed, omitConcealedFields := d.disclosurePolicy(targetMember) 444 445 if !shouldBeDisclosed(aliveMsg) { 446 return nil 447 } 448 449 d.lock.RLock() 450 defer d.lock.RUnlock() 451 452 deadPeers := []*proto.Envelope{} 453 454 for _, dm := range d.deadMembership.ToSlice() { 455 456 if !shouldBeDisclosed(dm) { 457 continue 458 } 459 deadPeers = append(deadPeers, omitConcealedFields(dm)) 460 } 461 462 var aliveSnapshot []*proto.Envelope 463 for _, am := range d.aliveMembership.ToSlice() { 464 if !shouldBeDisclosed(am) { 465 continue 466 } 467 aliveSnapshot = append(aliveSnapshot, omitConcealedFields(am)) 468 } 469 470 return &proto.MembershipResponse{ 471 Alive: append(aliveSnapshot, omitConcealedFields(aliveMsg)), 472 Dead: deadPeers, 473 } 474 } 475 476 func (d *gossipDiscoveryImpl) handleAliveMessage(m *proto.SignedGossipMessage) { 477 d.logger.Debug("Entering", m) 478 defer d.logger.Debug("Exiting") 479 480 if !d.crypt.ValidateAliveMsg(m) { 481 d.logger.Debugf("Alive message isn't authentic, someone must be spoofing %s's identity", m.GetAliveMsg()) 482 return 483 } 484 485 pkiID := m.GetAliveMsg().Membership.PkiId 486 if equalPKIid(pkiID, d.self.PKIid) { 487 d.logger.Debug("Got alive message about ourselves,", m) 488 diffExternalEndpoint := d.self.Endpoint != m.GetAliveMsg().Membership.Endpoint 489 var diffInternalEndpoint bool 490 secretEnvelope := m.GetSecretEnvelope() 491 if secretEnvelope != nil && secretEnvelope.InternalEndpoint() != "" { 492 diffInternalEndpoint = secretEnvelope.InternalEndpoint() != d.self.InternalEndpoint 493 } 494 if diffInternalEndpoint || diffExternalEndpoint { 495 d.logger.Error("Bad configuration detected: Received AliveMessage from a peer with the same PKI-ID as myself:", m.GossipMessage) 496 } 497 498 return 499 } 500 501 ts := m.GetAliveMsg().Timestamp 502 503 d.lock.RLock() 504 _, known := d.id2Member[string(pkiID)] 505 d.lock.RUnlock() 506 507 if !known { 508 d.learnNewMembers([]*proto.SignedGossipMessage{m}, []*proto.SignedGossipMessage{}) 509 return 510 } 511 512 d.lock.RLock() 513 _, isAlive := d.aliveLastTS[string(pkiID)] 514 lastDeadTS, isDead := d.deadLastTS[string(pkiID)] 515 d.lock.RUnlock() 516 517 if !isAlive && !isDead { 518 d.logger.Panicf("Member %s is known but not found neither in alive nor in dead lastTS maps, isAlive=%v, isDead=%v", m.GetAliveMsg().Membership.Endpoint, isAlive, isDead) 519 return 520 } 521 522 if isAlive && isDead { 523 d.logger.Panicf("Member %s is both alive and dead at the same time", m.GetAliveMsg().Membership) 524 return 525 } 526 527 if isDead { 528 if before(lastDeadTS, ts) { 529 // resurrect peer 530 d.resurrectMember(m, *ts) 531 } else if !same(lastDeadTS, ts) { 532 d.logger.Debug(m.GetAliveMsg().Membership, "lastDeadTS:", lastDeadTS, "but got ts:", ts) 533 } 534 return 535 } 536 537 d.lock.RLock() 538 lastAliveTS, isAlive := d.aliveLastTS[string(pkiID)] 539 d.lock.RUnlock() 540 541 if isAlive { 542 if before(lastAliveTS, ts) { 543 d.learnExistingMembers([]*proto.SignedGossipMessage{m}) 544 } else if !same(lastAliveTS, ts) { 545 d.logger.Debug(m.GetAliveMsg().Membership, "lastAliveTS:", lastAliveTS, "but got ts:", ts) 546 } 547 548 } 549 // else, ignore the message because it is too old 550 } 551 552 func (d *gossipDiscoveryImpl) resurrectMember(am *proto.SignedGossipMessage, t proto.PeerTime) { 553 d.logger.Info("Entering, AliveMessage:", am, "t:", t) 554 defer d.logger.Info("Exiting") 555 d.lock.Lock() 556 defer d.lock.Unlock() 557 558 member := am.GetAliveMsg().Membership 559 pkiID := member.PkiId 560 d.aliveLastTS[string(pkiID)] = ×tamp{ 561 lastSeen: time.Now(), 562 seqNum: t.SeqNum, 563 incTime: tsToTime(t.IncNum), 564 } 565 566 var internalEndpoint string 567 if prevNetMem := d.id2Member[string(pkiID)]; prevNetMem != nil { 568 internalEndpoint = prevNetMem.InternalEndpoint 569 } 570 if am.Envelope.SecretEnvelope != nil { 571 internalEndpoint = am.Envelope.SecretEnvelope.InternalEndpoint() 572 } 573 574 d.id2Member[string(pkiID)] = &NetworkMember{ 575 Endpoint: member.Endpoint, 576 Metadata: member.Metadata, 577 PKIid: member.PkiId, 578 InternalEndpoint: internalEndpoint, 579 } 580 581 delete(d.deadLastTS, string(pkiID)) 582 d.deadMembership.Remove(common.PKIidType(pkiID)) 583 d.aliveMembership.Put(common.PKIidType(pkiID), &proto.SignedGossipMessage{GossipMessage: am.GossipMessage, Envelope: am.Envelope}) 584 } 585 586 func (d *gossipDiscoveryImpl) periodicalReconnectToDead() { 587 defer d.logger.Debug("Stopped") 588 589 for !d.toDie() { 590 wg := &sync.WaitGroup{} 591 592 for _, member := range d.copyLastSeen(d.deadLastTS) { 593 wg.Add(1) 594 go func(member NetworkMember) { 595 defer wg.Done() 596 if d.comm.Ping(&member) { 597 d.logger.Debug(member, "is responding, sending membership request") 598 d.sendMembershipRequest(&member, true) 599 } else { 600 d.logger.Debug(member, "is still dead") 601 } 602 }(member) 603 } 604 605 wg.Wait() 606 d.logger.Debug("Sleeping", getReconnectInterval()) 607 time.Sleep(getReconnectInterval()) 608 } 609 } 610 611 func (d *gossipDiscoveryImpl) sendMembershipRequest(member *NetworkMember, includeInternalEndpoint bool) { 612 m, err := d.createMembershipRequest(includeInternalEndpoint) 613 if err != nil { 614 d.logger.Warning("Failed creating membership request:", err) 615 return 616 } 617 req, err := m.NoopSign() 618 if err != nil { 619 d.logger.Error("Failed creating SignedGossipMessage:", err) 620 return 621 } 622 d.comm.SendToPeer(member, req) 623 } 624 625 func (d *gossipDiscoveryImpl) createMembershipRequest(includeInternalEndpoint bool) (*proto.GossipMessage, error) { 626 am, err := d.createAliveMessage(includeInternalEndpoint) 627 if err != nil { 628 return nil, err 629 } 630 req := &proto.MembershipRequest{ 631 SelfInformation: am.Envelope, 632 // TODO: sending the known peers is not secure because the remote peer might shouldn't know 633 // TODO: about the known peers. I'm deprecating this until a secure mechanism will be implemented. 634 // TODO: See FAB-2570 for tracking this issue. 635 Known: [][]byte{}, 636 } 637 return &proto.GossipMessage{ 638 Tag: proto.GossipMessage_EMPTY, 639 Nonce: uint64(0), 640 Content: &proto.GossipMessage_MemReq{ 641 MemReq: req, 642 }, 643 }, nil 644 } 645 646 func (d *gossipDiscoveryImpl) copyLastSeen(lastSeenMap map[string]*timestamp) []NetworkMember { 647 d.lock.RLock() 648 defer d.lock.RUnlock() 649 650 res := []NetworkMember{} 651 for pkiIDStr := range lastSeenMap { 652 res = append(res, *(d.id2Member[pkiIDStr])) 653 } 654 return res 655 } 656 657 func (d *gossipDiscoveryImpl) periodicalCheckAlive() { 658 defer d.logger.Debug("Stopped") 659 660 for !d.toDie() { 661 time.Sleep(getAliveExpirationCheckInterval()) 662 dead := d.getDeadMembers() 663 if len(dead) > 0 { 664 d.logger.Debugf("Got %v dead members: %v", len(dead), dead) 665 d.expireDeadMembers(dead) 666 } 667 } 668 } 669 670 func (d *gossipDiscoveryImpl) expireDeadMembers(dead []common.PKIidType) { 671 d.logger.Warning("Entering", dead) 672 defer d.logger.Warning("Exiting") 673 674 var deadMembers2Expire []*NetworkMember 675 676 d.lock.Lock() 677 678 for _, pkiID := range dead { 679 if _, isAlive := d.aliveLastTS[string(pkiID)]; !isAlive { 680 continue 681 } 682 deadMembers2Expire = append(deadMembers2Expire, d.id2Member[string(pkiID)]) 683 // move lastTS from alive to dead 684 lastTS, hasLastTS := d.aliveLastTS[string(pkiID)] 685 if hasLastTS { 686 d.deadLastTS[string(pkiID)] = lastTS 687 delete(d.aliveLastTS, string(pkiID)) 688 } 689 690 if am := d.aliveMembership.MsgByID(pkiID); am != nil { 691 d.deadMembership.Put(pkiID, am) 692 d.aliveMembership.Remove(pkiID) 693 } 694 } 695 696 d.lock.Unlock() 697 698 for _, member2Expire := range deadMembers2Expire { 699 d.logger.Warning("Closing connection to", member2Expire) 700 d.comm.CloseConn(member2Expire) 701 } 702 } 703 704 func (d *gossipDiscoveryImpl) getDeadMembers() []common.PKIidType { 705 d.lock.RLock() 706 defer d.lock.RUnlock() 707 708 dead := []common.PKIidType{} 709 for id, last := range d.aliveLastTS { 710 elapsedNonAliveTime := time.Since(last.lastSeen) 711 if elapsedNonAliveTime.Nanoseconds() > getAliveExpirationTimeout().Nanoseconds() { 712 d.logger.Warning("Haven't heard from", []byte(id), "for", elapsedNonAliveTime) 713 dead = append(dead, common.PKIidType(id)) 714 } 715 } 716 return dead 717 } 718 719 func (d *gossipDiscoveryImpl) periodicalSendAlive() { 720 defer d.logger.Debug("Stopped") 721 722 for !d.toDie() { 723 d.logger.Debug("Sleeping", getAliveTimeInterval()) 724 time.Sleep(getAliveTimeInterval()) 725 msg, err := d.createAliveMessage(true) 726 if err != nil { 727 d.logger.Warning("Failed creating alive message:", err) 728 return 729 } 730 d.comm.Gossip(msg) 731 } 732 } 733 734 func (d *gossipDiscoveryImpl) createAliveMessage(includeInternalEndpoint bool) (*proto.SignedGossipMessage, error) { 735 d.lock.Lock() 736 d.seqNum++ 737 seqNum := d.seqNum 738 739 endpoint := d.self.Endpoint 740 meta := d.self.Metadata 741 pkiID := d.self.PKIid 742 internalEndpoint := d.self.InternalEndpoint 743 744 d.lock.Unlock() 745 746 msg2Gossip := &proto.GossipMessage{ 747 Tag: proto.GossipMessage_EMPTY, 748 Content: &proto.GossipMessage_AliveMsg{ 749 AliveMsg: &proto.AliveMessage{ 750 Membership: &proto.Member{ 751 Endpoint: endpoint, 752 Metadata: meta, 753 PkiId: pkiID, 754 }, 755 Timestamp: &proto.PeerTime{ 756 IncNum: uint64(d.incTime), 757 SeqNum: seqNum, 758 }, 759 }, 760 }, 761 } 762 763 envp := d.crypt.SignMessage(msg2Gossip, internalEndpoint) 764 if envp == nil { 765 return nil, errors.New("Failed signing message") 766 } 767 signedMsg := &proto.SignedGossipMessage{ 768 GossipMessage: msg2Gossip, 769 Envelope: envp, 770 } 771 772 if !includeInternalEndpoint { 773 signedMsg.Envelope.SecretEnvelope = nil 774 } 775 776 return signedMsg, nil 777 } 778 779 func (d *gossipDiscoveryImpl) learnExistingMembers(aliveArr []*proto.SignedGossipMessage) { 780 d.logger.Debugf("Entering: learnedMembers={%v}", aliveArr) 781 defer d.logger.Debug("Exiting") 782 783 d.lock.Lock() 784 defer d.lock.Unlock() 785 786 for _, m := range aliveArr { 787 am := m.GetAliveMsg() 788 if m == nil { 789 d.logger.Warning("Expected alive message, got instead:", m) 790 return 791 } 792 d.logger.Debug("updating", am) 793 794 var internalEndpoint string 795 if prevNetMem := d.id2Member[string(am.Membership.PkiId)]; prevNetMem != nil { 796 internalEndpoint = prevNetMem.InternalEndpoint 797 } 798 if m.Envelope.SecretEnvelope != nil { 799 internalEndpoint = m.Envelope.SecretEnvelope.InternalEndpoint() 800 } 801 802 // update member's data 803 member := d.id2Member[string(am.Membership.PkiId)] 804 member.Endpoint = am.Membership.Endpoint 805 member.Metadata = am.Membership.Metadata 806 member.InternalEndpoint = internalEndpoint 807 808 if _, isKnownAsDead := d.deadLastTS[string(am.Membership.PkiId)]; isKnownAsDead { 809 d.logger.Warning(am.Membership, "has already expired") 810 continue 811 } 812 813 if _, isKnownAsAlive := d.aliveLastTS[string(am.Membership.PkiId)]; !isKnownAsAlive { 814 d.logger.Warning(am.Membership, "has already expired") 815 continue 816 } else { 817 d.logger.Debug("Updating aliveness data:", am) 818 // update existing aliveness data 819 alive := d.aliveLastTS[string(am.Membership.PkiId)] 820 alive.incTime = tsToTime(am.Timestamp.IncNum) 821 alive.lastSeen = time.Now() 822 alive.seqNum = am.Timestamp.SeqNum 823 824 if am := d.aliveMembership.MsgByID(m.GetAliveMsg().Membership.PkiId); am == nil { 825 d.logger.Debug("Adding", am, "to aliveMembership") 826 msg := &proto.SignedGossipMessage{GossipMessage: m.GossipMessage, Envelope: am.Envelope} 827 d.aliveMembership.Put(m.GetAliveMsg().Membership.PkiId, msg) 828 } else { 829 d.logger.Debug("Replacing", am, "in aliveMembership") 830 am.GossipMessage = m.GossipMessage 831 am.Envelope = m.Envelope 832 } 833 } 834 } 835 } 836 837 func (d *gossipDiscoveryImpl) learnNewMembers(aliveMembers []*proto.SignedGossipMessage, deadMembers []*proto.SignedGossipMessage) { 838 d.logger.Debugf("Entering: learnedMembers={%v}, deadMembers={%v}", aliveMembers, deadMembers) 839 defer d.logger.Debugf("Exiting") 840 841 d.lock.Lock() 842 defer d.lock.Unlock() 843 844 for _, am := range aliveMembers { 845 if equalPKIid(am.GetAliveMsg().Membership.PkiId, d.self.PKIid) { 846 continue 847 } 848 d.aliveLastTS[string(am.GetAliveMsg().Membership.PkiId)] = ×tamp{ 849 incTime: tsToTime(am.GetAliveMsg().Timestamp.IncNum), 850 lastSeen: time.Now(), 851 seqNum: am.GetAliveMsg().Timestamp.SeqNum, 852 } 853 854 d.aliveMembership.Put(am.GetAliveMsg().Membership.PkiId, &proto.SignedGossipMessage{GossipMessage: am.GossipMessage, Envelope: am.Envelope}) 855 d.logger.Debugf("Learned about a new alive member: %v", am) 856 } 857 858 for _, dm := range deadMembers { 859 if equalPKIid(dm.GetAliveMsg().Membership.PkiId, d.self.PKIid) { 860 continue 861 } 862 d.deadLastTS[string(dm.GetAliveMsg().Membership.PkiId)] = ×tamp{ 863 incTime: tsToTime(dm.GetAliveMsg().Timestamp.IncNum), 864 lastSeen: time.Now(), 865 seqNum: dm.GetAliveMsg().Timestamp.SeqNum, 866 } 867 868 d.deadMembership.Put(dm.GetAliveMsg().Membership.PkiId, &proto.SignedGossipMessage{GossipMessage: dm.GossipMessage, Envelope: dm.Envelope}) 869 d.logger.Debugf("Learned about a new dead member: %v", dm) 870 } 871 872 // update the member in any case 873 for _, a := range [][]*proto.SignedGossipMessage{aliveMembers, deadMembers} { 874 for _, m := range a { 875 member := m.GetAliveMsg() 876 if member == nil { 877 d.logger.Warning("Expected alive message, got instead:", m) 878 return 879 } 880 881 var internalEndpoint string 882 if m.Envelope.SecretEnvelope != nil { 883 internalEndpoint = m.Envelope.SecretEnvelope.InternalEndpoint() 884 } 885 886 if prevNetMem := d.id2Member[string(member.Membership.PkiId)]; prevNetMem != nil { 887 internalEndpoint = prevNetMem.InternalEndpoint 888 } 889 890 d.id2Member[string(member.Membership.PkiId)] = &NetworkMember{ 891 Endpoint: member.Membership.Endpoint, 892 Metadata: member.Membership.Metadata, 893 PKIid: member.Membership.PkiId, 894 InternalEndpoint: internalEndpoint, 895 } 896 } 897 } 898 } 899 900 func (d *gossipDiscoveryImpl) GetMembership() []NetworkMember { 901 if d.toDie() { 902 return []NetworkMember{} 903 } 904 d.lock.RLock() 905 defer d.lock.RUnlock() 906 907 response := []NetworkMember{} 908 for _, m := range d.aliveMembership.ToSlice() { 909 member := m.GetAliveMsg() 910 response = append(response, NetworkMember{ 911 PKIid: member.Membership.PkiId, 912 Endpoint: member.Membership.Endpoint, 913 Metadata: member.Membership.Metadata, 914 InternalEndpoint: d.id2Member[string(m.GetAliveMsg().Membership.PkiId)].InternalEndpoint, 915 }) 916 } 917 return response 918 919 } 920 921 func tsToTime(ts uint64) time.Time { 922 return time.Unix(int64(0), int64(ts)) 923 } 924 925 func (d *gossipDiscoveryImpl) UpdateMetadata(md []byte) { 926 d.lock.Lock() 927 defer d.lock.Unlock() 928 d.self.Metadata = md 929 } 930 931 func (d *gossipDiscoveryImpl) UpdateEndpoint(endpoint string) { 932 d.lock.Lock() 933 defer d.lock.Unlock() 934 935 d.self.Endpoint = endpoint 936 } 937 938 func (d *gossipDiscoveryImpl) Self() NetworkMember { 939 return NetworkMember{ 940 Endpoint: d.self.Endpoint, 941 Metadata: d.self.Metadata, 942 PKIid: d.self.PKIid, 943 InternalEndpoint: d.self.InternalEndpoint, 944 } 945 } 946 947 func (d *gossipDiscoveryImpl) toDie() bool { 948 toDie := atomic.LoadInt32(&d.toDieFlag) == int32(1) 949 return toDie 950 } 951 952 func (d *gossipDiscoveryImpl) Stop() { 953 defer d.logger.Info("Stopped") 954 d.logger.Info("Stopping") 955 atomic.StoreInt32(&d.toDieFlag, int32(1)) 956 d.msgStore.Stop() 957 d.toDieChan <- struct{}{} 958 } 959 960 func equalPKIid(a, b common.PKIidType) bool { 961 return bytes.Equal(a, b) 962 } 963 964 func same(a *timestamp, b *proto.PeerTime) bool { 965 return uint64(a.incTime.UnixNano()) == b.IncNum && a.seqNum == b.SeqNum 966 } 967 968 func before(a *timestamp, b *proto.PeerTime) bool { 969 return (uint64(a.incTime.UnixNano()) == b.IncNum && a.seqNum < b.SeqNum) || 970 uint64(a.incTime.UnixNano()) < b.IncNum 971 } 972 973 func getAliveTimeInterval() time.Duration { 974 return util.GetDurationOrDefault("peer.gossip.aliveTimeInterval", defaultHelloInterval) 975 } 976 977 func getAliveExpirationTimeout() time.Duration { 978 return util.GetDurationOrDefault("peer.gossip.aliveExpirationTimeout", 5*getAliveTimeInterval()) 979 } 980 981 func getAliveExpirationCheckInterval() time.Duration { 982 if aliveExpirationCheckInterval != 0 { 983 return aliveExpirationCheckInterval 984 } 985 986 return time.Duration(getAliveExpirationTimeout() / 10) 987 } 988 989 func getReconnectInterval() time.Duration { 990 return util.GetDurationOrDefault("peer.gossip.reconnectInterval", getAliveExpirationTimeout()) 991 } 992 993 type aliveMsgStore struct { 994 msgstore.MessageStore 995 } 996 997 func newAliveMsgStore(d *gossipDiscoveryImpl) *aliveMsgStore { 998 policy := proto.NewGossipMessageComparator(0) 999 trigger := func(m interface{}) {} 1000 aliveMsgTTL := getAliveExpirationTimeout() * msgExpirationFactor 1001 externalLock := func() { d.lock.Lock() } 1002 externalUnlock := func() { d.lock.Unlock() } 1003 callback := func(m interface{}) { 1004 msg := m.(*proto.SignedGossipMessage) 1005 if !msg.IsAliveMsg() { 1006 return 1007 } 1008 id := msg.GetAliveMsg().Membership.PkiId 1009 d.aliveMembership.Remove(id) 1010 d.deadMembership.Remove(id) 1011 delete(d.id2Member, string(id)) 1012 delete(d.deadLastTS, string(id)) 1013 delete(d.aliveLastTS, string(id)) 1014 } 1015 1016 s := &aliveMsgStore{ 1017 MessageStore: msgstore.NewMessageStoreExpirable(policy, trigger, aliveMsgTTL, externalLock, externalUnlock, callback), 1018 } 1019 return s 1020 } 1021 1022 func (s *aliveMsgStore) Add(msg interface{}) bool { 1023 if !msg.(*proto.SignedGossipMessage).IsAliveMsg() { 1024 panic(fmt.Sprint("Msg ", msg, " is not AliveMsg")) 1025 } 1026 return s.MessageStore.Add(msg) 1027 } 1028 1029 func (s *aliveMsgStore) CheckValid(msg interface{}) bool { 1030 if !msg.(*proto.SignedGossipMessage).IsAliveMsg() { 1031 panic(fmt.Sprint("Msg ", msg, " is not AliveMsg")) 1032 } 1033 return s.MessageStore.CheckValid(msg) 1034 }